diff options
Diffstat (limited to 'vnet')
454 files changed, 204156 insertions, 0 deletions
diff --git a/vnet/.gitignore b/vnet/.gitignore new file mode 100644 index 00000000000..b25c15b81fa --- /dev/null +++ b/vnet/.gitignore @@ -0,0 +1 @@ +*~ diff --git a/vnet/Makefile.am b/vnet/Makefile.am new file mode 100644 index 00000000000..ee01bff1ee9 --- /dev/null +++ b/vnet/Makefile.am @@ -0,0 +1,632 @@ +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +AUTOMAKE_OPTIONS = foreign subdir-objects + +AM_CFLAGS = -Wall @DPDK@ @VIRL@ + +libvnet_la_SOURCES = +libvnetplugin_la_SOURCES = +nobase_include_HEADERS = + +######################################## +# Generic stuff +######################################## +libvnet_la_SOURCES += \ + vnet/config.c \ + vnet/interface.c \ + vnet/interface_cli.c \ + vnet/interface_format.c \ + vnet/interface_output.c \ + vnet/misc.c \ + vnet/replication.c \ + vnet/rewrite.c + +nobase_include_HEADERS += \ + vnet/api_errno.h \ + vnet/buffer.h \ + vnet/config.h \ + vnet/global_funcs.h \ + vnet/interface.h \ + vnet/interface_funcs.h \ + vnet/l3_types.h \ + vnet/pipeline.h \ + vnet/replication.h \ + vnet/rewrite.h \ + vnet/vnet.h + +######################################## +# Policer infra +######################################## + +libvnet_la_SOURCES += \ + vnet/policer/node_funcs.c \ + vnet/policer/policer.c \ + vnet/policer/xlate.c + +nobase_include_HEADERS += \ + vnet/policer/police.h \ + vnet/policer/xlate.h + + +######################################## +# Layer 2 protocols go here +######################################## + +######################################## +# Layer 2 protocol: Ethernet +######################################## +libvnet_la_SOURCES += \ + vnet/ethernet/arp.c \ + vnet/ethernet/cli.c \ + vnet/ethernet/format.c \ + vnet/ethernet/init.c \ + vnet/ethernet/interface.c \ + vnet/ethernet/node.c \ + vnet/ethernet/pg.c + +nobase_include_HEADERS += \ + vnet/ethernet/arp_packet.h \ + vnet/ethernet/error.def \ + vnet/ethernet/ethernet.h \ + vnet/ethernet/packet.h \ + vnet/ethernet/types.def + +######################################## +# Layer 2 protocol: Ethernet bridging +######################################## +libvnet_la_SOURCES += \ + vnet/l2/feat_bitmap.c \ + vnet/l2/l2_bd.c \ + vnet/l2/l2_bvi.c \ + vnet/l2/l2_classify.c \ + vnet/l2/l2_efp_filter.c \ + vnet/l2/l2_fib.c \ + vnet/l2/l2_flood.c \ + vnet/l2/l2_fwd.c \ + vnet/l2/l2_input_acl.c \ + vnet/l2/l2_input.c \ + vnet/l2/l2_input_vtr.c \ + vnet/l2/l2_learn.c \ + vnet/l2/l2_output_acl.c \ + vnet/l2/l2_output.c \ + vnet/l2/l2_patch.c \ + vnet/l2/l2_vtr.c \ + vnet/l2/l2_xcrw.c + +nobase_include_HEADERS += \ + vnet/l2/feat_bitmap.h \ + vnet/l2/l2_input.h \ + vnet/l2/l2_output.h \ + vnet/l2/l2_vtr.h \ + vnet/l2/l2_input_vtr.h \ + vnet/l2/l2_efp_filter.h \ + vnet/l2/l2_fwd.h \ + vnet/l2/l2_bd.h \ + vnet/l2/l2_bvi.h \ + vnet/l2/l2_flood.h \ + vnet/l2/l2_fib.h \ + vnet/l2/l2_xcrw.h \ + vnet/l2/l2_classify.h + +######################################## +# Layer 2 protocol: SRP +######################################## +libvnet_la_SOURCES += \ + vnet/srp/format.c \ + vnet/srp/interface.c \ + vnet/srp/node.c \ + vnet/srp/pg.c + +nobase_include_HEADERS += \ + vnet/srp/packet.h \ + vnet/srp/srp.h + +######################################## +# Layer 2 protocol: PPP +######################################## +libvnet_la_SOURCES += \ + vnet/ppp/node.c \ + vnet/ppp/pg.c \ + vnet/ppp/ppp.c + +nobase_include_HEADERS += \ + vnet/ppp/error.def \ + vnet/ppp/ppp.h \ + vnet/ppp/packet.h + +######################################## +# Layer 2 protocol: HDLC +######################################## +libvnet_la_SOURCES += \ + vnet/hdlc/node.c \ + vnet/hdlc/pg.c \ + vnet/hdlc/hdlc.c + +nobase_include_HEADERS += \ + vnet/hdlc/error.def \ + vnet/hdlc/hdlc.h \ + vnet/hdlc/packet.h + +######################################## +# Layer 2 protocol: LLC +######################################## +libvnet_la_SOURCES += \ + vnet/llc/llc.c \ + vnet/llc/node.c \ + vnet/llc/pg.c + +nobase_include_HEADERS += \ + vnet/llc/llc.h + +######################################## +# Layer 2 protocol: SNAP +######################################## +libvnet_la_SOURCES += \ + vnet/snap/snap.c \ + vnet/snap/node.c \ + vnet/snap/pg.c + +nobase_include_HEADERS += \ + vnet/snap/snap.h + +######################################## +# Layer 2 / vxlan +######################################## +libvnet_la_SOURCES += \ + vnet/vxlan/vxlan.c \ + vnet/vxlan/encap.c \ + vnet/vxlan/decap.c + +nobase_include_HEADERS += \ + vnet/vxlan/vxlan.h \ + vnet/vxlan/vxlan_packet.h \ + vnet/vxlan/vxlan_error.def + +######################################## +# Layer 2/3 "classify" +######################################## +libvnet_la_SOURCES += \ + vnet/classify/vnet_classify.c \ + vnet/classify/ip_classify.c \ + vnet/classify/input_acl.c \ + vnet/classify/vnet_classify.h + +nobase_include_HEADERS += \ + vnet/classify/vnet_classify.h \ + vnet/classify/input_acl.h + +######################################## +# Layer 3 protocols go here +######################################## + +# vnet/ip/tcp.c \ +# vnet/ip/tcp_format.c \ +# vnet/ip/tcp_init.c \ +# vnet/ip/tcp_pg.c + + +######################################## +# Layer 3 protocol: IP v4/v6 +######################################## +libvnet_la_SOURCES += \ + vnet/ip/format.c \ + vnet/ip/icmp4.c \ + vnet/ip/icmp6.c \ + vnet/ip/ip46_cli.c \ + vnet/ip/ip4_format.c \ + vnet/ip/ip4_forward.c \ + vnet/ip/ip4_hop_by_hop.c \ + vnet/ip/ip4_input.c \ + vnet/ip/ip4_mtrie.c \ + vnet/ip/ip4_pg.c \ + vnet/ip/ip4_source_check.c \ + vnet/ip/ip6_format.c \ + vnet/ip/ip6_forward.c \ + vnet/ip/ip6_hop_by_hop.c \ + vnet/ip/ip6_input.c \ + vnet/ip/ip6_neighbor.c \ + vnet/ip/ip6_pg.c \ + vnet/ip/ip_checksum.c \ + vnet/ip/ip.h \ + vnet/ip/ip_init.c \ + vnet/ip/lookup.c \ + vnet/ip/udp_format.c \ + vnet/ip/udp_init.c \ + vnet/ip/udp_local.c \ + vnet/ip/udp_pg.c \ + vnet/ip/ip_input_acl.c \ + vnet/ip/ip_frag.c + +nobase_include_HEADERS += \ + vnet/ip/format.h \ + vnet/ip/icmp46_packet.h \ + vnet/ip/icmp6.h \ + vnet/ip/igmp_packet.h \ + vnet/ip/ip.h \ + vnet/ip/ip4.h \ + vnet/ip/ip4_mtrie.h \ + vnet/ip/ip4_error.h \ + vnet/ip/ip4_packet.h \ + vnet/ip/ip6.h \ + vnet/ip/ip6_error.h \ + vnet/ip/ip6_hop_by_hop.h \ + vnet/ip/ip6_hop_by_hop_packet.h \ + vnet/ip/ip6_packet.h \ + vnet/ip/lookup.h \ + vnet/ip/ip_packet.h \ + vnet/ip/ports.def \ + vnet/ip/protocols.def \ + vnet/ip/tcp.h \ + vnet/ip/tcp_packet.h \ + vnet/ip/udp.h \ + vnet/ip/udp_error.def \ + vnet/ip/udp_packet.h + +######################################## +# Layer 3 protocol: IPSec +######################################## +if WITH_DPDK +libvnet_la_SOURCES += \ + vnet/ipsec/ipsec.c \ + vnet/ipsec/ipsec_cli.c \ + vnet/ipsec/ipsec_format.c \ + vnet/ipsec/ipsec_output.c \ + vnet/ipsec/ipsec_input.c \ + vnet/ipsec/ipsec_if.c \ + vnet/ipsec/ipsec_if_in.c \ + vnet/ipsec/ipsec_if_out.c \ + vnet/ipsec/esp_encrypt.c \ + vnet/ipsec/esp_decrypt.c \ + vnet/ipsec/ikev2.c \ + vnet/ipsec/ikev2_crypto.c \ + vnet/ipsec/ikev2_cli.c \ + vnet/ipsec/ikev2_payload.c \ + vnet/ipsec/ikev2_format.c + +nobase_include_HEADERS += \ + vnet/ipsec/ipsec.h \ + vnet/ipsec/esp.h \ + vnet/ipsec/ikev2.h \ + vnet/ipsec/ikev2_priv.h +endif + +######################################## +# Layer 3 protocol: osi +######################################## +libvnet_la_SOURCES += \ + vnet/osi/node.c \ + vnet/osi/osi.c \ + vnet/osi/pg.c + +nobase_include_HEADERS += \ + vnet/osi/osi.h + +######################################## +# Layer 3 protocol: MAP +######################################## +libvnet_la_SOURCES += \ + vnet/map/map.c \ + vnet/map/ip4_map.c \ + vnet/map/ip6_map.c \ + vnet/map/sixrd.c \ + vnet/map/ip4_sixrd.c \ + vnet/map/ip6_sixrd.c \ + vnet/map/ip4_map_t.c \ + vnet/map/ip6_map_t.c + +nobase_include_HEADERS += \ + vnet/map/map.h \ + vnet/map/sixrd.h + +######################################## +# Tunnel protocol: gre +######################################## +libvnet_la_SOURCES += \ + vnet/gre/gre.c \ + vnet/gre/node.c \ + vnet/gre/interface.c \ + vnet/gre/pg.c + +nobase_include_HEADERS += \ + vnet/gre/gre.h \ + vnet/gre/packet.h \ + vnet/gre/error.def + +######################################## +# Tunnel protocol: l2tpv3 +######################################## +libvnet_la_SOURCES += \ + vnet/l2tp/l2tp.c \ + vnet/l2tp/encap.c \ + vnet/l2tp/decap.c \ + vnet/l2tp/pg.c + +nobase_include_HEADERS += \ + vnet/l2tp/l2tp.h \ + vnet/l2tp/packet.h + +######################################## +# Tunnel protocol: gre+mpls +######################################## +libvnet_la_SOURCES += \ + vnet/mpls-gre/mpls.c \ + vnet/mpls-gre/node.c \ + vnet/mpls-gre/interface.c \ + vnet/mpls-gre/policy_encap.c \ + vnet/mpls-gre/pg.c + +nobase_include_HEADERS += \ + vnet/mpls-gre/mpls.h \ + vnet/mpls-gre/packet.h \ + vnet/mpls-gre/error.def + + +######################################## +# Tunnel protocol: nsh-gre +######################################## + +libvnet_la_SOURCES += \ + vnet/nsh-gre/nsh_gre.c \ + vnet/nsh-gre/encap.c \ + vnet/nsh-gre/decap.c + +nobase_include_HEADERS += \ + vnet/nsh-gre/nsh_gre.h \ + vnet/nsh-gre/nsh_gre_packet.h \ + vnet/nsh-gre/nsh_gre_error.def + +######################################## +# Tunnel protocol: nsh-vxlan-gpe +######################################## + +libvnet_la_SOURCES += \ + vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.c \ + vnet/nsh-vxlan-gpe/encap.c \ + vnet/nsh-vxlan-gpe/decap.c + +nobase_include_HEADERS += \ + vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h \ + vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h \ + vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def + +######################################## +# Tunnel protocol: lisp-gpe +######################################## + +libvnet_la_SOURCES += \ + vnet/lisp-gpe/lisp_gpe.c \ + vnet/lisp-gpe/encap.c \ + vnet/lisp-gpe/decap.c + +nobase_include_HEADERS += \ + vnet/lisp-gpe/lisp_gpe.h \ + vnet/lisp-gpe/lisp_gpe_packet.h \ + vnet/lisp-gpe/lisp_gpe_error.def + +######################################## +# DHCP client +######################################## +libvnet_la_SOURCES += \ + vnet/dhcp/client.c \ + vnet/dhcp/client.h + +nobase_include_HEADERS += \ + vnet/dhcp/client.h + +######################################## +# DHCP proxy +######################################## +libvnet_la_SOURCES += \ + vnet/dhcp/proxy_node.c \ + vnet/dhcp/proxy.h + +nobase_include_HEADERS += \ + vnet/dhcp/packet.h \ + vnet/dhcp/proxy.h \ + vnet/dhcp/proxy_error.def + +######################################## +# ipv6 segment routing +######################################## +libvnet_la_SOURCES += \ + vnet/sr/sr.c + +nobase_include_HEADERS += \ + vnet/sr/sr_packet.h \ + vnet/sr/sr_error.def \ + vnet/sr/sr.h + +######################################## +# CGN +######################################### +libvnet_la_SOURCES += \ + vnet/vcgn/cnat_bulk_port.c \ + vnet/vcgn/cnat_config.c \ + vnet/vcgn/cnat_db_scanner.c \ + vnet/vcgn/cnat_db_v2.c \ + vnet/vcgn/cnat_debug_msg_handler.c \ + vnet/vcgn/cnat_cli_handler.c \ + vnet/vcgn/cnat_global.c \ + vnet/vcgn/cnat_ipv4_udp_inside_input.c \ + vnet/vcgn/cnat_ipv4_udp_inside_input_exceptions.c \ + vnet/vcgn/cnat_ipv4_udp_outside_input.c \ + vnet/vcgn/cnat_ipv4_tcp_inside_input.c \ + vnet/vcgn/cnat_ipv4_tcp_inside_input_exceptions.c \ + vnet/vcgn/cnat_ipv4_tcp_outside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_query_inside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_query_inside_input_exception.c \ + vnet/vcgn/cnat_ipv4_icmp_query_outside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_error_inside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_error_outside_input.c \ + vnet/vcgn/cnat_logging.c \ + vnet/vcgn/cnat_ports.c \ + vnet/vcgn/cnat_util.c \ + vnet/vcgn/cnat_show.c \ + vnet/vcgn/cnat_syslog.c \ + vnet/vcgn/cnat_v4_functions.c \ + vnet/vcgn/index_list.c \ + vnet/vcgn/spp_platform_trace_log.c \ + vnet/vcgn/vcgn_classify.c + +######################################## +# DHCPv6 proxy +######################################## +libvnet_la_SOURCES += \ + vnet/dhcpv6/proxy_node.c + +nobase_include_HEADERS += \ + vnet/dhcpv6/packet.h \ + vnet/dhcpv6/proxy.h \ + vnet/dhcpv6/proxy_error.def + +######################################## +# IPFIX / netflow v10 +######################################## +libvnet_la_SOURCES += \ + vnet/flow/flow_report.c + +nobase_include_HEADERS += \ + vnet/flow/flow_report.h \ + vnet/flow/ipfix_info_elements.h \ + vnet/flow/ipfix_packet.h + +######################################## +# IPFIX sample code +######################################## + +libvnet_la_SOURCES += \ + vnet/flow/flow_report_sample.c + +nobase_include_HEADERS += \ + vnet/flow/flow_report_sample.h + +######################################## +# lawful intercept +######################################## + +libvnet_la_SOURCES += \ + vnet/lawful-intercept/lawful_intercept.c \ + vnet/lawful-intercept/node.c + +nobase_include_HEADERS += \ + vnet/dpdk_replication.h \ + vnet/lawful-intercept/lawful_intercept.h + +######################################## +# Packet generator +######################################## + +libvnet_la_SOURCES += \ + vnet/pg/cli.c \ + vnet/pg/edit.c \ + vnet/pg/init.c \ + vnet/pg/input.c \ + vnet/pg/output.c \ + vnet/pg/stream.c + +nobase_include_HEADERS += \ + vnet/pg/pg.h \ + vnet/pg/edit.h + +######################################## +# Intel DPDK +######################################## +if WITH_DPDK +libvnet_la_SOURCES += \ + vnet/devices/dpdk/dpdk_priv.h \ + vnet/devices/dpdk/device.c \ + vnet/devices/dpdk/init.c \ + vnet/devices/dpdk/node.c \ + vnet/devices/dpdk/threads.c \ + vnet/devices/dpdk/vhost_user.c \ + vnet/devices/dpdk/cli.c + +nobase_include_HEADERS += \ + vnet/devices/dpdk/dpdk.h \ + vnet/devices/dpdk/threads.h +endif + +######################################## +# virtio +######################################## + +if WITH_DPDK +libvnet_la_SOURCES += \ + vnet/devices/virtio/vhost-user.c + +nobase_include_HEADERS += \ + vnet/devices/virtio/vhost-user.h +endif WITH_DPDK + +######################################## +# ssvm ethernet +######################################## +libvnet_la_SOURCES += \ + vnet/devices/ssvm/ssvm_eth.c \ + vnet/devices/ssvm/node.c + +nobase_include_HEADERS += \ + vnet/devices/ssvm/ssvm_eth.h + +######################################## +# Unix kernel related +######################################## + +# FIXME: vnet/unix/hgshm.c + +libvnet_la_SOURCES += \ + vnet/unix/gdb_funcs.c \ + vnet/unix/pcap.c \ + vnet/unix/tapcli.c \ + vnet/unix/tuntap.c + +nobase_include_HEADERS += \ + vnet/unix/pcap.h \ + vnet/unix/tuntap.h \ + vnet/unix/tapcli.h + +######################################## +# Plugin client library +######################################## + +libvnetplugin_la_SOURCES += \ + vnet/plugin/p1.c + +nobase_include_HEADERS += \ + vnet/plugin/plugin.h + +lib_LTLIBRARIES = libvnet.la libvnetplugin.la + +noinst_PROGRAMS = vnet_unix + +vnet_unix_SOURCES = \ + example/main_stub.c + +vnet_unix_LDFLAGS = -static +dpdk_libs = + +if WITH_DPDK +dpdk_libs += -l:libdpdk.a +endif + +vnet_unix_LDADD = libvnet.la -l:libvlib_unix.a -l:libvlib.a -l:libvppinfra.a -lpthread -lvlibapi -lvlibmemory -lsvm -lm -ldl $(dpdk_libs) -lrt + +pcap2pg_SOURCES = \ + vnet/unix/pcap2pg.c \ + vnet/unix/pcap.h + +pcap2pg_LDFLAGS = -static +pcap2pg_LDADD = libvnet.la -l:libvppinfra.a -lpthread -lm -ldl + +noinst_PROGRAMS += pcap2pg diff --git a/vnet/Makefile.in b/vnet/Makefile.in new file mode 100644 index 00000000000..82000f4113b --- /dev/null +++ b/vnet/Makefile.in @@ -0,0 +1,2376 @@ +# Makefile.in generated by automake 1.14.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ + +######################################## +# Layer 3 protocol: IPSec +######################################## +@WITH_DPDK_TRUE@am__append_1 = \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_cli.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_format.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_output.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_input.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_if.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_if_in.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_if_out.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/esp_encrypt.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/esp_decrypt.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ikev2.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ikev2_crypto.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ikev2_cli.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ikev2_payload.c \ +@WITH_DPDK_TRUE@ vnet/ipsec/ikev2_format.c + +@WITH_DPDK_TRUE@am__append_2 = \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec.h \ +@WITH_DPDK_TRUE@ vnet/ipsec/esp.h \ +@WITH_DPDK_TRUE@ vnet/ipsec/ikev2.h \ +@WITH_DPDK_TRUE@ vnet/ipsec/ikev2_priv.h + + +######################################## +# Intel DPDK +######################################## + +######################################## +# virtio +######################################## +@WITH_DPDK_TRUE@am__append_3 = vnet/devices/dpdk/dpdk_priv.h \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/device.c \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/init.c \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/node.c \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/threads.c \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/vhost_user.c \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/cli.c \ +@WITH_DPDK_TRUE@ vnet/devices/virtio/vhost-user.c +@WITH_DPDK_TRUE@am__append_4 = vnet/devices/dpdk/dpdk.h \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/threads.h \ +@WITH_DPDK_TRUE@ vnet/devices/virtio/vhost-user.h +noinst_PROGRAMS = vnet_unix$(EXEEXT) pcap2pg$(EXEEXT) +@WITH_DPDK_TRUE@am__append_5 = -l:libdpdk.a +subdir = . +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/configure $(am__configure_deps) depcomp \ + $(am__nobase_include_HEADERS_DIST) compile config.guess \ + config.sub install-sh missing ltmain.sh +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libvnet_la_LIBADD = +am__libvnet_la_SOURCES_DIST = vnet/config.c vnet/interface.c \ + vnet/interface_cli.c vnet/interface_format.c \ + vnet/interface_output.c vnet/misc.c vnet/replication.c \ + vnet/rewrite.c vnet/policer/node_funcs.c \ + vnet/policer/policer.c vnet/policer/xlate.c \ + vnet/ethernet/arp.c vnet/ethernet/cli.c vnet/ethernet/format.c \ + vnet/ethernet/init.c vnet/ethernet/interface.c \ + vnet/ethernet/node.c vnet/ethernet/pg.c vnet/l2/feat_bitmap.c \ + vnet/l2/l2_bd.c vnet/l2/l2_bvi.c vnet/l2/l2_classify.c \ + vnet/l2/l2_efp_filter.c vnet/l2/l2_fib.c vnet/l2/l2_flood.c \ + vnet/l2/l2_fwd.c vnet/l2/l2_input_acl.c vnet/l2/l2_input.c \ + vnet/l2/l2_input_vtr.c vnet/l2/l2_learn.c \ + vnet/l2/l2_output_acl.c vnet/l2/l2_output.c vnet/l2/l2_patch.c \ + vnet/l2/l2_vtr.c vnet/l2/l2_xcrw.c vnet/srp/format.c \ + vnet/srp/interface.c vnet/srp/node.c vnet/srp/pg.c \ + vnet/ppp/node.c vnet/ppp/pg.c vnet/ppp/ppp.c vnet/hdlc/node.c \ + vnet/hdlc/pg.c vnet/hdlc/hdlc.c vnet/llc/llc.c vnet/llc/node.c \ + vnet/llc/pg.c vnet/snap/snap.c vnet/snap/node.c vnet/snap/pg.c \ + vnet/vxlan/vxlan.c vnet/vxlan/encap.c vnet/vxlan/decap.c \ + vnet/classify/vnet_classify.c vnet/classify/ip_classify.c \ + vnet/classify/input_acl.c vnet/classify/vnet_classify.h \ + vnet/ip/format.c vnet/ip/icmp4.c vnet/ip/icmp6.c \ + vnet/ip/ip46_cli.c vnet/ip/ip4_format.c vnet/ip/ip4_forward.c \ + vnet/ip/ip4_hop_by_hop.c vnet/ip/ip4_input.c \ + vnet/ip/ip4_mtrie.c vnet/ip/ip4_pg.c \ + vnet/ip/ip4_source_check.c vnet/ip/ip6_format.c \ + vnet/ip/ip6_forward.c vnet/ip/ip6_hop_by_hop.c \ + vnet/ip/ip6_input.c vnet/ip/ip6_neighbor.c vnet/ip/ip6_pg.c \ + vnet/ip/ip_checksum.c vnet/ip/ip.h vnet/ip/ip_init.c \ + vnet/ip/lookup.c vnet/ip/udp_format.c vnet/ip/udp_init.c \ + vnet/ip/udp_local.c vnet/ip/udp_pg.c vnet/ip/ip_input_acl.c \ + vnet/ip/ip_frag.c vnet/ipsec/ipsec.c vnet/ipsec/ipsec_cli.c \ + vnet/ipsec/ipsec_format.c vnet/ipsec/ipsec_output.c \ + vnet/ipsec/ipsec_input.c vnet/ipsec/ipsec_if.c \ + vnet/ipsec/ipsec_if_in.c vnet/ipsec/ipsec_if_out.c \ + vnet/ipsec/esp_encrypt.c vnet/ipsec/esp_decrypt.c \ + vnet/ipsec/ikev2.c vnet/ipsec/ikev2_crypto.c \ + vnet/ipsec/ikev2_cli.c vnet/ipsec/ikev2_payload.c \ + vnet/ipsec/ikev2_format.c vnet/osi/node.c vnet/osi/osi.c \ + vnet/osi/pg.c vnet/map/map.c vnet/map/ip4_map.c \ + vnet/map/ip6_map.c vnet/map/sixrd.c vnet/map/ip4_sixrd.c \ + vnet/map/ip6_sixrd.c vnet/map/ip4_map_t.c vnet/map/ip6_map_t.c \ + vnet/gre/gre.c vnet/gre/node.c vnet/gre/interface.c \ + vnet/gre/pg.c vnet/l2tp/l2tp.c vnet/l2tp/encap.c \ + vnet/l2tp/decap.c vnet/l2tp/pg.c vnet/mpls-gre/mpls.c \ + vnet/mpls-gre/node.c vnet/mpls-gre/interface.c \ + vnet/mpls-gre/policy_encap.c vnet/mpls-gre/pg.c \ + vnet/nsh-gre/nsh_gre.c vnet/nsh-gre/encap.c \ + vnet/nsh-gre/decap.c vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.c \ + vnet/nsh-vxlan-gpe/encap.c vnet/nsh-vxlan-gpe/decap.c \ + vnet/lisp-gpe/lisp_gpe.c vnet/lisp-gpe/encap.c \ + vnet/lisp-gpe/decap.c vnet/dhcp/client.c vnet/dhcp/client.h \ + vnet/dhcp/proxy_node.c vnet/dhcp/proxy.h vnet/sr/sr.c \ + vnet/vcgn/cnat_bulk_port.c vnet/vcgn/cnat_config.c \ + vnet/vcgn/cnat_db_scanner.c vnet/vcgn/cnat_db_v2.c \ + vnet/vcgn/cnat_debug_msg_handler.c \ + vnet/vcgn/cnat_cli_handler.c vnet/vcgn/cnat_global.c \ + vnet/vcgn/cnat_ipv4_udp_inside_input.c \ + vnet/vcgn/cnat_ipv4_udp_inside_input_exceptions.c \ + vnet/vcgn/cnat_ipv4_udp_outside_input.c \ + vnet/vcgn/cnat_ipv4_tcp_inside_input.c \ + vnet/vcgn/cnat_ipv4_tcp_inside_input_exceptions.c \ + vnet/vcgn/cnat_ipv4_tcp_outside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_query_inside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_query_inside_input_exception.c \ + vnet/vcgn/cnat_ipv4_icmp_query_outside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_error_inside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_error_outside_input.c \ + vnet/vcgn/cnat_logging.c vnet/vcgn/cnat_ports.c \ + vnet/vcgn/cnat_util.c vnet/vcgn/cnat_show.c \ + vnet/vcgn/cnat_syslog.c vnet/vcgn/cnat_v4_functions.c \ + vnet/vcgn/index_list.c vnet/vcgn/spp_platform_trace_log.c \ + vnet/vcgn/vcgn_classify.c vnet/dhcpv6/proxy_node.c \ + vnet/flow/flow_report.c vnet/flow/flow_report_sample.c \ + vnet/lawful-intercept/lawful_intercept.c \ + vnet/lawful-intercept/node.c vnet/pg/cli.c vnet/pg/edit.c \ + vnet/pg/init.c vnet/pg/input.c vnet/pg/output.c \ + vnet/pg/stream.c vnet/devices/dpdk/dpdk_priv.h \ + vnet/devices/dpdk/device.c vnet/devices/dpdk/init.c \ + vnet/devices/dpdk/node.c vnet/devices/dpdk/threads.c \ + vnet/devices/dpdk/vhost_user.c vnet/devices/dpdk/cli.c \ + vnet/devices/virtio/vhost-user.c vnet/devices/ssvm/ssvm_eth.c \ + vnet/devices/ssvm/node.c vnet/unix/gdb_funcs.c \ + vnet/unix/pcap.c vnet/unix/tapcli.c vnet/unix/tuntap.c +am__dirstamp = $(am__leading_dot)dirstamp +@WITH_DPDK_TRUE@am__objects_1 = vnet/ipsec/ipsec.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_cli.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_format.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_output.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_input.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_if.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_if_in.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/ipsec_if_out.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/esp_encrypt.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/esp_decrypt.lo vnet/ipsec/ikev2.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/ikev2_crypto.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/ikev2_cli.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/ikev2_payload.lo \ +@WITH_DPDK_TRUE@ vnet/ipsec/ikev2_format.lo +@WITH_DPDK_TRUE@am__objects_2 = vnet/devices/dpdk/device.lo \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/init.lo \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/node.lo \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/threads.lo \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/vhost_user.lo \ +@WITH_DPDK_TRUE@ vnet/devices/dpdk/cli.lo \ +@WITH_DPDK_TRUE@ vnet/devices/virtio/vhost-user.lo +am_libvnet_la_OBJECTS = vnet/config.lo vnet/interface.lo \ + vnet/interface_cli.lo vnet/interface_format.lo \ + vnet/interface_output.lo vnet/misc.lo vnet/replication.lo \ + vnet/rewrite.lo vnet/policer/node_funcs.lo \ + vnet/policer/policer.lo vnet/policer/xlate.lo \ + vnet/ethernet/arp.lo vnet/ethernet/cli.lo \ + vnet/ethernet/format.lo vnet/ethernet/init.lo \ + vnet/ethernet/interface.lo vnet/ethernet/node.lo \ + vnet/ethernet/pg.lo vnet/l2/feat_bitmap.lo vnet/l2/l2_bd.lo \ + vnet/l2/l2_bvi.lo vnet/l2/l2_classify.lo \ + vnet/l2/l2_efp_filter.lo vnet/l2/l2_fib.lo vnet/l2/l2_flood.lo \ + vnet/l2/l2_fwd.lo vnet/l2/l2_input_acl.lo vnet/l2/l2_input.lo \ + vnet/l2/l2_input_vtr.lo vnet/l2/l2_learn.lo \ + vnet/l2/l2_output_acl.lo vnet/l2/l2_output.lo \ + vnet/l2/l2_patch.lo vnet/l2/l2_vtr.lo vnet/l2/l2_xcrw.lo \ + vnet/srp/format.lo vnet/srp/interface.lo vnet/srp/node.lo \ + vnet/srp/pg.lo vnet/ppp/node.lo vnet/ppp/pg.lo vnet/ppp/ppp.lo \ + vnet/hdlc/node.lo vnet/hdlc/pg.lo vnet/hdlc/hdlc.lo \ + vnet/llc/llc.lo vnet/llc/node.lo vnet/llc/pg.lo \ + vnet/snap/snap.lo vnet/snap/node.lo vnet/snap/pg.lo \ + vnet/vxlan/vxlan.lo vnet/vxlan/encap.lo vnet/vxlan/decap.lo \ + vnet/classify/vnet_classify.lo vnet/classify/ip_classify.lo \ + vnet/classify/input_acl.lo vnet/ip/format.lo vnet/ip/icmp4.lo \ + vnet/ip/icmp6.lo vnet/ip/ip46_cli.lo vnet/ip/ip4_format.lo \ + vnet/ip/ip4_forward.lo vnet/ip/ip4_hop_by_hop.lo \ + vnet/ip/ip4_input.lo vnet/ip/ip4_mtrie.lo vnet/ip/ip4_pg.lo \ + vnet/ip/ip4_source_check.lo vnet/ip/ip6_format.lo \ + vnet/ip/ip6_forward.lo vnet/ip/ip6_hop_by_hop.lo \ + vnet/ip/ip6_input.lo vnet/ip/ip6_neighbor.lo vnet/ip/ip6_pg.lo \ + vnet/ip/ip_checksum.lo vnet/ip/ip_init.lo vnet/ip/lookup.lo \ + vnet/ip/udp_format.lo vnet/ip/udp_init.lo vnet/ip/udp_local.lo \ + vnet/ip/udp_pg.lo vnet/ip/ip_input_acl.lo vnet/ip/ip_frag.lo \ + $(am__objects_1) vnet/osi/node.lo vnet/osi/osi.lo \ + vnet/osi/pg.lo vnet/map/map.lo vnet/map/ip4_map.lo \ + vnet/map/ip6_map.lo vnet/map/sixrd.lo vnet/map/ip4_sixrd.lo \ + vnet/map/ip6_sixrd.lo vnet/map/ip4_map_t.lo \ + vnet/map/ip6_map_t.lo vnet/gre/gre.lo vnet/gre/node.lo \ + vnet/gre/interface.lo vnet/gre/pg.lo vnet/l2tp/l2tp.lo \ + vnet/l2tp/encap.lo vnet/l2tp/decap.lo vnet/l2tp/pg.lo \ + vnet/mpls-gre/mpls.lo vnet/mpls-gre/node.lo \ + vnet/mpls-gre/interface.lo vnet/mpls-gre/policy_encap.lo \ + vnet/mpls-gre/pg.lo vnet/nsh-gre/nsh_gre.lo \ + vnet/nsh-gre/encap.lo vnet/nsh-gre/decap.lo \ + vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.lo \ + vnet/nsh-vxlan-gpe/encap.lo vnet/nsh-vxlan-gpe/decap.lo \ + vnet/lisp-gpe/lisp_gpe.lo vnet/lisp-gpe/encap.lo \ + vnet/lisp-gpe/decap.lo vnet/dhcp/client.lo \ + vnet/dhcp/proxy_node.lo vnet/sr/sr.lo \ + vnet/vcgn/cnat_bulk_port.lo vnet/vcgn/cnat_config.lo \ + vnet/vcgn/cnat_db_scanner.lo vnet/vcgn/cnat_db_v2.lo \ + vnet/vcgn/cnat_debug_msg_handler.lo \ + vnet/vcgn/cnat_cli_handler.lo vnet/vcgn/cnat_global.lo \ + vnet/vcgn/cnat_ipv4_udp_inside_input.lo \ + vnet/vcgn/cnat_ipv4_udp_inside_input_exceptions.lo \ + vnet/vcgn/cnat_ipv4_udp_outside_input.lo \ + vnet/vcgn/cnat_ipv4_tcp_inside_input.lo \ + vnet/vcgn/cnat_ipv4_tcp_inside_input_exceptions.lo \ + vnet/vcgn/cnat_ipv4_tcp_outside_input.lo \ + vnet/vcgn/cnat_ipv4_icmp_query_inside_input.lo \ + vnet/vcgn/cnat_ipv4_icmp_query_inside_input_exception.lo \ + vnet/vcgn/cnat_ipv4_icmp_query_outside_input.lo \ + vnet/vcgn/cnat_ipv4_icmp_error_inside_input.lo \ + vnet/vcgn/cnat_ipv4_icmp_error_outside_input.lo \ + vnet/vcgn/cnat_logging.lo vnet/vcgn/cnat_ports.lo \ + vnet/vcgn/cnat_util.lo vnet/vcgn/cnat_show.lo \ + vnet/vcgn/cnat_syslog.lo vnet/vcgn/cnat_v4_functions.lo \ + vnet/vcgn/index_list.lo vnet/vcgn/spp_platform_trace_log.lo \ + vnet/vcgn/vcgn_classify.lo vnet/dhcpv6/proxy_node.lo \ + vnet/flow/flow_report.lo vnet/flow/flow_report_sample.lo \ + vnet/lawful-intercept/lawful_intercept.lo \ + vnet/lawful-intercept/node.lo vnet/pg/cli.lo vnet/pg/edit.lo \ + vnet/pg/init.lo vnet/pg/input.lo vnet/pg/output.lo \ + vnet/pg/stream.lo $(am__objects_2) \ + vnet/devices/ssvm/ssvm_eth.lo vnet/devices/ssvm/node.lo \ + vnet/unix/gdb_funcs.lo vnet/unix/pcap.lo vnet/unix/tapcli.lo \ + vnet/unix/tuntap.lo +libvnet_la_OBJECTS = $(am_libvnet_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libvnetplugin_la_LIBADD = +am_libvnetplugin_la_OBJECTS = vnet/plugin/p1.lo +libvnetplugin_la_OBJECTS = $(am_libvnetplugin_la_OBJECTS) +PROGRAMS = $(noinst_PROGRAMS) +am_pcap2pg_OBJECTS = vnet/unix/pcap2pg.$(OBJEXT) +pcap2pg_OBJECTS = $(am_pcap2pg_OBJECTS) +pcap2pg_DEPENDENCIES = libvnet.la +pcap2pg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(pcap2pg_LDFLAGS) $(LDFLAGS) -o $@ +am_vnet_unix_OBJECTS = example/main_stub.$(OBJEXT) +vnet_unix_OBJECTS = $(am_vnet_unix_OBJECTS) +am__DEPENDENCIES_1 = +am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) +vnet_unix_DEPENDENCIES = libvnet.la $(am__DEPENDENCIES_2) +vnet_unix_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(vnet_unix_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libvnet_la_SOURCES) $(libvnetplugin_la_SOURCES) \ + $(pcap2pg_SOURCES) $(vnet_unix_SOURCES) +DIST_SOURCES = $(am__libvnet_la_SOURCES_DIST) \ + $(libvnetplugin_la_SOURCES) $(pcap2pg_SOURCES) \ + $(vnet_unix_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__nobase_include_HEADERS_DIST = vnet/api_errno.h vnet/buffer.h \ + vnet/config.h vnet/global_funcs.h vnet/interface.h \ + vnet/interface_funcs.h vnet/l3_types.h vnet/pipeline.h \ + vnet/replication.h vnet/rewrite.h vnet/vnet.h \ + vnet/policer/police.h vnet/policer/xlate.h \ + vnet/ethernet/arp_packet.h vnet/ethernet/error.def \ + vnet/ethernet/ethernet.h vnet/ethernet/packet.h \ + vnet/ethernet/types.def vnet/l2/feat_bitmap.h \ + vnet/l2/l2_input.h vnet/l2/l2_output.h vnet/l2/l2_vtr.h \ + vnet/l2/l2_input_vtr.h vnet/l2/l2_efp_filter.h \ + vnet/l2/l2_fwd.h vnet/l2/l2_bd.h vnet/l2/l2_bvi.h \ + vnet/l2/l2_flood.h vnet/l2/l2_fib.h vnet/l2/l2_xcrw.h \ + vnet/l2/l2_classify.h vnet/srp/packet.h vnet/srp/srp.h \ + vnet/ppp/error.def vnet/ppp/ppp.h vnet/ppp/packet.h \ + vnet/hdlc/error.def vnet/hdlc/hdlc.h vnet/hdlc/packet.h \ + vnet/llc/llc.h vnet/snap/snap.h vnet/vxlan/vxlan.h \ + vnet/vxlan/vxlan_packet.h vnet/vxlan/vxlan_error.def \ + vnet/classify/vnet_classify.h vnet/classify/input_acl.h \ + vnet/ip/format.h vnet/ip/icmp46_packet.h vnet/ip/icmp6.h \ + vnet/ip/igmp_packet.h vnet/ip/ip.h vnet/ip/ip4.h \ + vnet/ip/ip4_mtrie.h vnet/ip/ip4_error.h vnet/ip/ip4_packet.h \ + vnet/ip/ip6.h vnet/ip/ip6_error.h vnet/ip/ip6_hop_by_hop.h \ + vnet/ip/ip6_hop_by_hop_packet.h vnet/ip/ip6_packet.h \ + vnet/ip/lookup.h vnet/ip/ip_packet.h vnet/ip/ports.def \ + vnet/ip/protocols.def vnet/ip/tcp.h vnet/ip/tcp_packet.h \ + vnet/ip/udp.h vnet/ip/udp_error.def vnet/ip/udp_packet.h \ + vnet/ipsec/ipsec.h vnet/ipsec/esp.h vnet/ipsec/ikev2.h \ + vnet/ipsec/ikev2_priv.h vnet/osi/osi.h vnet/map/map.h \ + vnet/map/sixrd.h vnet/gre/gre.h vnet/gre/packet.h \ + vnet/gre/error.def vnet/l2tp/l2tp.h vnet/l2tp/packet.h \ + vnet/mpls-gre/mpls.h vnet/mpls-gre/packet.h \ + vnet/mpls-gre/error.def vnet/nsh-gre/nsh_gre.h \ + vnet/nsh-gre/nsh_gre_packet.h vnet/nsh-gre/nsh_gre_error.def \ + vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h \ + vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h \ + vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def \ + vnet/lisp-gpe/lisp_gpe.h vnet/lisp-gpe/lisp_gpe_packet.h \ + vnet/lisp-gpe/lisp_gpe_error.def vnet/dhcp/client.h \ + vnet/dhcp/packet.h vnet/dhcp/proxy.h vnet/dhcp/proxy_error.def \ + vnet/sr/sr_packet.h vnet/sr/sr_error.def vnet/sr/sr.h \ + vnet/dhcpv6/packet.h vnet/dhcpv6/proxy.h \ + vnet/dhcpv6/proxy_error.def vnet/flow/flow_report.h \ + vnet/flow/ipfix_info_elements.h vnet/flow/ipfix_packet.h \ + vnet/flow/flow_report_sample.h vnet/dpdk_replication.h \ + vnet/lawful-intercept/lawful_intercept.h vnet/pg/pg.h \ + vnet/pg/edit.h vnet/devices/dpdk/dpdk.h \ + vnet/devices/dpdk/threads.h vnet/devices/virtio/vhost-user.h \ + vnet/devices/ssvm/ssvm_eth.h vnet/unix/pcap.h \ + vnet/unix/tuntap.h vnet/unix/tapcli.h vnet/plugin/plugin.h +HEADERS = $(nobase_include_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +CSCOPE = cscope +AM_RECURSIVE_TARGETS = cscope +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + if test -d "$(distdir)"; then \ + find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -rf "$(distdir)" \ + || { sleep 5 && rm -rf "$(distdir)"; }; \ + else :; fi +am__post_remove_distdir = $(am__remove_distdir) +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +DIST_TARGETS = dist-gzip +distuninstallcheck_listfiles = find . -type f -print +am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ + | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DPDK = @DPDK@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +VIRL = @VIRL@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign subdir-objects +AM_CFLAGS = -Wall @DPDK@ @VIRL@ + +######################################## +# Generic stuff +######################################## + +######################################## +# Policer infra +######################################## + +######################################## +# Layer 2 protocols go here +######################################## + +######################################## +# Layer 2 protocol: Ethernet +######################################## + +######################################## +# Layer 2 protocol: Ethernet bridging +######################################## + +######################################## +# Layer 2 protocol: SRP +######################################## + +######################################## +# Layer 2 protocol: PPP +######################################## + +######################################## +# Layer 2 protocol: HDLC +######################################## + +######################################## +# Layer 2 protocol: LLC +######################################## + +######################################## +# Layer 2 protocol: SNAP +######################################## + +######################################## +# Layer 2 / vxlan +######################################## + +######################################## +# Layer 2/3 "classify" +######################################## + +######################################## +# Layer 3 protocols go here +######################################## + +# vnet/ip/tcp.c \ +# vnet/ip/tcp_format.c \ +# vnet/ip/tcp_init.c \ +# vnet/ip/tcp_pg.c + +######################################## +# Layer 3 protocol: IP v4/v6 +######################################## + +######################################## +# Layer 3 protocol: osi +######################################## + +######################################## +# Layer 3 protocol: MAP +######################################## + +######################################## +# Tunnel protocol: gre +######################################## + +######################################## +# Tunnel protocol: l2tpv3 +######################################## + +######################################## +# Tunnel protocol: gre+mpls +######################################## + +######################################## +# Tunnel protocol: nsh-gre +######################################## + +######################################## +# Tunnel protocol: nsh-vxlan-gpe +######################################## + +######################################## +# Tunnel protocol: lisp-gpe +######################################## + +######################################## +# DHCP client +######################################## + +######################################## +# DHCP proxy +######################################## + +######################################## +# ipv6 segment routing +######################################## + +######################################## +# CGN +######################################### + +######################################## +# DHCPv6 proxy +######################################## + +######################################## +# IPFIX / netflow v10 +######################################## + +######################################## +# IPFIX sample code +######################################## + +######################################## +# lawful intercept +######################################## + +######################################## +# Packet generator +######################################## + +######################################## +# ssvm ethernet +######################################## + +######################################## +# Unix kernel related +######################################## + +# FIXME: vnet/unix/hgshm.c +libvnet_la_SOURCES = vnet/config.c vnet/interface.c \ + vnet/interface_cli.c vnet/interface_format.c \ + vnet/interface_output.c vnet/misc.c vnet/replication.c \ + vnet/rewrite.c vnet/policer/node_funcs.c \ + vnet/policer/policer.c vnet/policer/xlate.c \ + vnet/ethernet/arp.c vnet/ethernet/cli.c vnet/ethernet/format.c \ + vnet/ethernet/init.c vnet/ethernet/interface.c \ + vnet/ethernet/node.c vnet/ethernet/pg.c vnet/l2/feat_bitmap.c \ + vnet/l2/l2_bd.c vnet/l2/l2_bvi.c vnet/l2/l2_classify.c \ + vnet/l2/l2_efp_filter.c vnet/l2/l2_fib.c vnet/l2/l2_flood.c \ + vnet/l2/l2_fwd.c vnet/l2/l2_input_acl.c vnet/l2/l2_input.c \ + vnet/l2/l2_input_vtr.c vnet/l2/l2_learn.c \ + vnet/l2/l2_output_acl.c vnet/l2/l2_output.c vnet/l2/l2_patch.c \ + vnet/l2/l2_vtr.c vnet/l2/l2_xcrw.c vnet/srp/format.c \ + vnet/srp/interface.c vnet/srp/node.c vnet/srp/pg.c \ + vnet/ppp/node.c vnet/ppp/pg.c vnet/ppp/ppp.c vnet/hdlc/node.c \ + vnet/hdlc/pg.c vnet/hdlc/hdlc.c vnet/llc/llc.c vnet/llc/node.c \ + vnet/llc/pg.c vnet/snap/snap.c vnet/snap/node.c vnet/snap/pg.c \ + vnet/vxlan/vxlan.c vnet/vxlan/encap.c vnet/vxlan/decap.c \ + vnet/classify/vnet_classify.c vnet/classify/ip_classify.c \ + vnet/classify/input_acl.c vnet/classify/vnet_classify.h \ + vnet/ip/format.c vnet/ip/icmp4.c vnet/ip/icmp6.c \ + vnet/ip/ip46_cli.c vnet/ip/ip4_format.c vnet/ip/ip4_forward.c \ + vnet/ip/ip4_hop_by_hop.c vnet/ip/ip4_input.c \ + vnet/ip/ip4_mtrie.c vnet/ip/ip4_pg.c \ + vnet/ip/ip4_source_check.c vnet/ip/ip6_format.c \ + vnet/ip/ip6_forward.c vnet/ip/ip6_hop_by_hop.c \ + vnet/ip/ip6_input.c vnet/ip/ip6_neighbor.c vnet/ip/ip6_pg.c \ + vnet/ip/ip_checksum.c vnet/ip/ip.h vnet/ip/ip_init.c \ + vnet/ip/lookup.c vnet/ip/udp_format.c vnet/ip/udp_init.c \ + vnet/ip/udp_local.c vnet/ip/udp_pg.c vnet/ip/ip_input_acl.c \ + vnet/ip/ip_frag.c $(am__append_1) vnet/osi/node.c \ + vnet/osi/osi.c vnet/osi/pg.c vnet/map/map.c vnet/map/ip4_map.c \ + vnet/map/ip6_map.c vnet/map/sixrd.c vnet/map/ip4_sixrd.c \ + vnet/map/ip6_sixrd.c vnet/map/ip4_map_t.c vnet/map/ip6_map_t.c \ + vnet/gre/gre.c vnet/gre/node.c vnet/gre/interface.c \ + vnet/gre/pg.c vnet/l2tp/l2tp.c vnet/l2tp/encap.c \ + vnet/l2tp/decap.c vnet/l2tp/pg.c vnet/mpls-gre/mpls.c \ + vnet/mpls-gre/node.c vnet/mpls-gre/interface.c \ + vnet/mpls-gre/policy_encap.c vnet/mpls-gre/pg.c \ + vnet/nsh-gre/nsh_gre.c vnet/nsh-gre/encap.c \ + vnet/nsh-gre/decap.c vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.c \ + vnet/nsh-vxlan-gpe/encap.c vnet/nsh-vxlan-gpe/decap.c \ + vnet/lisp-gpe/lisp_gpe.c vnet/lisp-gpe/encap.c \ + vnet/lisp-gpe/decap.c vnet/dhcp/client.c vnet/dhcp/client.h \ + vnet/dhcp/proxy_node.c vnet/dhcp/proxy.h vnet/sr/sr.c \ + vnet/vcgn/cnat_bulk_port.c vnet/vcgn/cnat_config.c \ + vnet/vcgn/cnat_db_scanner.c vnet/vcgn/cnat_db_v2.c \ + vnet/vcgn/cnat_debug_msg_handler.c \ + vnet/vcgn/cnat_cli_handler.c vnet/vcgn/cnat_global.c \ + vnet/vcgn/cnat_ipv4_udp_inside_input.c \ + vnet/vcgn/cnat_ipv4_udp_inside_input_exceptions.c \ + vnet/vcgn/cnat_ipv4_udp_outside_input.c \ + vnet/vcgn/cnat_ipv4_tcp_inside_input.c \ + vnet/vcgn/cnat_ipv4_tcp_inside_input_exceptions.c \ + vnet/vcgn/cnat_ipv4_tcp_outside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_query_inside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_query_inside_input_exception.c \ + vnet/vcgn/cnat_ipv4_icmp_query_outside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_error_inside_input.c \ + vnet/vcgn/cnat_ipv4_icmp_error_outside_input.c \ + vnet/vcgn/cnat_logging.c vnet/vcgn/cnat_ports.c \ + vnet/vcgn/cnat_util.c vnet/vcgn/cnat_show.c \ + vnet/vcgn/cnat_syslog.c vnet/vcgn/cnat_v4_functions.c \ + vnet/vcgn/index_list.c vnet/vcgn/spp_platform_trace_log.c \ + vnet/vcgn/vcgn_classify.c vnet/dhcpv6/proxy_node.c \ + vnet/flow/flow_report.c vnet/flow/flow_report_sample.c \ + vnet/lawful-intercept/lawful_intercept.c \ + vnet/lawful-intercept/node.c vnet/pg/cli.c vnet/pg/edit.c \ + vnet/pg/init.c vnet/pg/input.c vnet/pg/output.c \ + vnet/pg/stream.c $(am__append_3) vnet/devices/ssvm/ssvm_eth.c \ + vnet/devices/ssvm/node.c vnet/unix/gdb_funcs.c \ + vnet/unix/pcap.c vnet/unix/tapcli.c vnet/unix/tuntap.c + +######################################## +# Plugin client library +######################################## +libvnetplugin_la_SOURCES = vnet/plugin/p1.c +nobase_include_HEADERS = vnet/api_errno.h vnet/buffer.h vnet/config.h \ + vnet/global_funcs.h vnet/interface.h vnet/interface_funcs.h \ + vnet/l3_types.h vnet/pipeline.h vnet/replication.h \ + vnet/rewrite.h vnet/vnet.h vnet/policer/police.h \ + vnet/policer/xlate.h vnet/ethernet/arp_packet.h \ + vnet/ethernet/error.def vnet/ethernet/ethernet.h \ + vnet/ethernet/packet.h vnet/ethernet/types.def \ + vnet/l2/feat_bitmap.h vnet/l2/l2_input.h vnet/l2/l2_output.h \ + vnet/l2/l2_vtr.h vnet/l2/l2_input_vtr.h \ + vnet/l2/l2_efp_filter.h vnet/l2/l2_fwd.h vnet/l2/l2_bd.h \ + vnet/l2/l2_bvi.h vnet/l2/l2_flood.h vnet/l2/l2_fib.h \ + vnet/l2/l2_xcrw.h vnet/l2/l2_classify.h vnet/srp/packet.h \ + vnet/srp/srp.h vnet/ppp/error.def vnet/ppp/ppp.h \ + vnet/ppp/packet.h vnet/hdlc/error.def vnet/hdlc/hdlc.h \ + vnet/hdlc/packet.h vnet/llc/llc.h vnet/snap/snap.h \ + vnet/vxlan/vxlan.h vnet/vxlan/vxlan_packet.h \ + vnet/vxlan/vxlan_error.def vnet/classify/vnet_classify.h \ + vnet/classify/input_acl.h vnet/ip/format.h \ + vnet/ip/icmp46_packet.h vnet/ip/icmp6.h vnet/ip/igmp_packet.h \ + vnet/ip/ip.h vnet/ip/ip4.h vnet/ip/ip4_mtrie.h \ + vnet/ip/ip4_error.h vnet/ip/ip4_packet.h vnet/ip/ip6.h \ + vnet/ip/ip6_error.h vnet/ip/ip6_hop_by_hop.h \ + vnet/ip/ip6_hop_by_hop_packet.h vnet/ip/ip6_packet.h \ + vnet/ip/lookup.h vnet/ip/ip_packet.h vnet/ip/ports.def \ + vnet/ip/protocols.def vnet/ip/tcp.h vnet/ip/tcp_packet.h \ + vnet/ip/udp.h vnet/ip/udp_error.def vnet/ip/udp_packet.h \ + $(am__append_2) vnet/osi/osi.h vnet/map/map.h vnet/map/sixrd.h \ + vnet/gre/gre.h vnet/gre/packet.h vnet/gre/error.def \ + vnet/l2tp/l2tp.h vnet/l2tp/packet.h vnet/mpls-gre/mpls.h \ + vnet/mpls-gre/packet.h vnet/mpls-gre/error.def \ + vnet/nsh-gre/nsh_gre.h vnet/nsh-gre/nsh_gre_packet.h \ + vnet/nsh-gre/nsh_gre_error.def \ + vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h \ + vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h \ + vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def \ + vnet/lisp-gpe/lisp_gpe.h vnet/lisp-gpe/lisp_gpe_packet.h \ + vnet/lisp-gpe/lisp_gpe_error.def vnet/dhcp/client.h \ + vnet/dhcp/packet.h vnet/dhcp/proxy.h vnet/dhcp/proxy_error.def \ + vnet/sr/sr_packet.h vnet/sr/sr_error.def vnet/sr/sr.h \ + vnet/dhcpv6/packet.h vnet/dhcpv6/proxy.h \ + vnet/dhcpv6/proxy_error.def vnet/flow/flow_report.h \ + vnet/flow/ipfix_info_elements.h vnet/flow/ipfix_packet.h \ + vnet/flow/flow_report_sample.h vnet/dpdk_replication.h \ + vnet/lawful-intercept/lawful_intercept.h vnet/pg/pg.h \ + vnet/pg/edit.h $(am__append_4) vnet/devices/ssvm/ssvm_eth.h \ + vnet/unix/pcap.h vnet/unix/tuntap.h vnet/unix/tapcli.h \ + vnet/plugin/plugin.h +lib_LTLIBRARIES = libvnet.la libvnetplugin.la +vnet_unix_SOURCES = \ + example/main_stub.c + +vnet_unix_LDFLAGS = -static +dpdk_libs = $(am__append_5) +vnet_unix_LDADD = libvnet.la -l:libvlib_unix.a -l:libvlib.a -l:libvppinfra.a -lpthread -lvlibapi -lvlibmemory -lsvm -lm -ldl $(dpdk_libs) -lrt +pcap2pg_SOURCES = \ + vnet/unix/pcap2pg.c \ + vnet/unix/pcap.h + +pcap2pg_LDFLAGS = -static +pcap2pg_LDADD = libvnet.la -l:libvppinfra.a -lpthread -lm -ldl +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +am--refresh: Makefile + @: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \ + $(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: $(am__configure_deps) + $(am__cd) $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +vnet/$(am__dirstamp): + @$(MKDIR_P) vnet + @: > vnet/$(am__dirstamp) +vnet/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/$(DEPDIR) + @: > vnet/$(DEPDIR)/$(am__dirstamp) +vnet/config.lo: vnet/$(am__dirstamp) vnet/$(DEPDIR)/$(am__dirstamp) +vnet/interface.lo: vnet/$(am__dirstamp) vnet/$(DEPDIR)/$(am__dirstamp) +vnet/interface_cli.lo: vnet/$(am__dirstamp) \ + vnet/$(DEPDIR)/$(am__dirstamp) +vnet/interface_format.lo: vnet/$(am__dirstamp) \ + vnet/$(DEPDIR)/$(am__dirstamp) +vnet/interface_output.lo: vnet/$(am__dirstamp) \ + vnet/$(DEPDIR)/$(am__dirstamp) +vnet/misc.lo: vnet/$(am__dirstamp) vnet/$(DEPDIR)/$(am__dirstamp) +vnet/replication.lo: vnet/$(am__dirstamp) \ + vnet/$(DEPDIR)/$(am__dirstamp) +vnet/rewrite.lo: vnet/$(am__dirstamp) vnet/$(DEPDIR)/$(am__dirstamp) +vnet/policer/$(am__dirstamp): + @$(MKDIR_P) vnet/policer + @: > vnet/policer/$(am__dirstamp) +vnet/policer/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/policer/$(DEPDIR) + @: > vnet/policer/$(DEPDIR)/$(am__dirstamp) +vnet/policer/node_funcs.lo: vnet/policer/$(am__dirstamp) \ + vnet/policer/$(DEPDIR)/$(am__dirstamp) +vnet/policer/policer.lo: vnet/policer/$(am__dirstamp) \ + vnet/policer/$(DEPDIR)/$(am__dirstamp) +vnet/policer/xlate.lo: vnet/policer/$(am__dirstamp) \ + vnet/policer/$(DEPDIR)/$(am__dirstamp) +vnet/ethernet/$(am__dirstamp): + @$(MKDIR_P) vnet/ethernet + @: > vnet/ethernet/$(am__dirstamp) +vnet/ethernet/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/ethernet/$(DEPDIR) + @: > vnet/ethernet/$(DEPDIR)/$(am__dirstamp) +vnet/ethernet/arp.lo: vnet/ethernet/$(am__dirstamp) \ + vnet/ethernet/$(DEPDIR)/$(am__dirstamp) +vnet/ethernet/cli.lo: vnet/ethernet/$(am__dirstamp) \ + vnet/ethernet/$(DEPDIR)/$(am__dirstamp) +vnet/ethernet/format.lo: vnet/ethernet/$(am__dirstamp) \ + vnet/ethernet/$(DEPDIR)/$(am__dirstamp) +vnet/ethernet/init.lo: vnet/ethernet/$(am__dirstamp) \ + vnet/ethernet/$(DEPDIR)/$(am__dirstamp) +vnet/ethernet/interface.lo: vnet/ethernet/$(am__dirstamp) \ + vnet/ethernet/$(DEPDIR)/$(am__dirstamp) +vnet/ethernet/node.lo: vnet/ethernet/$(am__dirstamp) \ + vnet/ethernet/$(DEPDIR)/$(am__dirstamp) +vnet/ethernet/pg.lo: vnet/ethernet/$(am__dirstamp) \ + vnet/ethernet/$(DEPDIR)/$(am__dirstamp) +vnet/l2/$(am__dirstamp): + @$(MKDIR_P) vnet/l2 + @: > vnet/l2/$(am__dirstamp) +vnet/l2/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/l2/$(DEPDIR) + @: > vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/feat_bitmap.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_bd.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_bvi.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_classify.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_efp_filter.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_fib.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_flood.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_fwd.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_input_acl.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_input.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_input_vtr.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_learn.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_output_acl.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_output.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_patch.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_vtr.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/l2/l2_xcrw.lo: vnet/l2/$(am__dirstamp) \ + vnet/l2/$(DEPDIR)/$(am__dirstamp) +vnet/srp/$(am__dirstamp): + @$(MKDIR_P) vnet/srp + @: > vnet/srp/$(am__dirstamp) +vnet/srp/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/srp/$(DEPDIR) + @: > vnet/srp/$(DEPDIR)/$(am__dirstamp) +vnet/srp/format.lo: vnet/srp/$(am__dirstamp) \ + vnet/srp/$(DEPDIR)/$(am__dirstamp) +vnet/srp/interface.lo: vnet/srp/$(am__dirstamp) \ + vnet/srp/$(DEPDIR)/$(am__dirstamp) +vnet/srp/node.lo: vnet/srp/$(am__dirstamp) \ + vnet/srp/$(DEPDIR)/$(am__dirstamp) +vnet/srp/pg.lo: vnet/srp/$(am__dirstamp) \ + vnet/srp/$(DEPDIR)/$(am__dirstamp) +vnet/ppp/$(am__dirstamp): + @$(MKDIR_P) vnet/ppp + @: > vnet/ppp/$(am__dirstamp) +vnet/ppp/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/ppp/$(DEPDIR) + @: > vnet/ppp/$(DEPDIR)/$(am__dirstamp) +vnet/ppp/node.lo: vnet/ppp/$(am__dirstamp) \ + vnet/ppp/$(DEPDIR)/$(am__dirstamp) +vnet/ppp/pg.lo: vnet/ppp/$(am__dirstamp) \ + vnet/ppp/$(DEPDIR)/$(am__dirstamp) +vnet/ppp/ppp.lo: vnet/ppp/$(am__dirstamp) \ + vnet/ppp/$(DEPDIR)/$(am__dirstamp) +vnet/hdlc/$(am__dirstamp): + @$(MKDIR_P) vnet/hdlc + @: > vnet/hdlc/$(am__dirstamp) +vnet/hdlc/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/hdlc/$(DEPDIR) + @: > vnet/hdlc/$(DEPDIR)/$(am__dirstamp) +vnet/hdlc/node.lo: vnet/hdlc/$(am__dirstamp) \ + vnet/hdlc/$(DEPDIR)/$(am__dirstamp) +vnet/hdlc/pg.lo: vnet/hdlc/$(am__dirstamp) \ + vnet/hdlc/$(DEPDIR)/$(am__dirstamp) +vnet/hdlc/hdlc.lo: vnet/hdlc/$(am__dirstamp) \ + vnet/hdlc/$(DEPDIR)/$(am__dirstamp) +vnet/llc/$(am__dirstamp): + @$(MKDIR_P) vnet/llc + @: > vnet/llc/$(am__dirstamp) +vnet/llc/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/llc/$(DEPDIR) + @: > vnet/llc/$(DEPDIR)/$(am__dirstamp) +vnet/llc/llc.lo: vnet/llc/$(am__dirstamp) \ + vnet/llc/$(DEPDIR)/$(am__dirstamp) +vnet/llc/node.lo: vnet/llc/$(am__dirstamp) \ + vnet/llc/$(DEPDIR)/$(am__dirstamp) +vnet/llc/pg.lo: vnet/llc/$(am__dirstamp) \ + vnet/llc/$(DEPDIR)/$(am__dirstamp) +vnet/snap/$(am__dirstamp): + @$(MKDIR_P) vnet/snap + @: > vnet/snap/$(am__dirstamp) +vnet/snap/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/snap/$(DEPDIR) + @: > vnet/snap/$(DEPDIR)/$(am__dirstamp) +vnet/snap/snap.lo: vnet/snap/$(am__dirstamp) \ + vnet/snap/$(DEPDIR)/$(am__dirstamp) +vnet/snap/node.lo: vnet/snap/$(am__dirstamp) \ + vnet/snap/$(DEPDIR)/$(am__dirstamp) +vnet/snap/pg.lo: vnet/snap/$(am__dirstamp) \ + vnet/snap/$(DEPDIR)/$(am__dirstamp) +vnet/vxlan/$(am__dirstamp): + @$(MKDIR_P) vnet/vxlan + @: > vnet/vxlan/$(am__dirstamp) +vnet/vxlan/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/vxlan/$(DEPDIR) + @: > vnet/vxlan/$(DEPDIR)/$(am__dirstamp) +vnet/vxlan/vxlan.lo: vnet/vxlan/$(am__dirstamp) \ + vnet/vxlan/$(DEPDIR)/$(am__dirstamp) +vnet/vxlan/encap.lo: vnet/vxlan/$(am__dirstamp) \ + vnet/vxlan/$(DEPDIR)/$(am__dirstamp) +vnet/vxlan/decap.lo: vnet/vxlan/$(am__dirstamp) \ + vnet/vxlan/$(DEPDIR)/$(am__dirstamp) +vnet/classify/$(am__dirstamp): + @$(MKDIR_P) vnet/classify + @: > vnet/classify/$(am__dirstamp) +vnet/classify/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/classify/$(DEPDIR) + @: > vnet/classify/$(DEPDIR)/$(am__dirstamp) +vnet/classify/vnet_classify.lo: vnet/classify/$(am__dirstamp) \ + vnet/classify/$(DEPDIR)/$(am__dirstamp) +vnet/classify/ip_classify.lo: vnet/classify/$(am__dirstamp) \ + vnet/classify/$(DEPDIR)/$(am__dirstamp) +vnet/classify/input_acl.lo: vnet/classify/$(am__dirstamp) \ + vnet/classify/$(DEPDIR)/$(am__dirstamp) +vnet/ip/$(am__dirstamp): + @$(MKDIR_P) vnet/ip + @: > vnet/ip/$(am__dirstamp) +vnet/ip/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/ip/$(DEPDIR) + @: > vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/format.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/icmp4.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/icmp6.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip46_cli.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip4_format.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip4_forward.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip4_hop_by_hop.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip4_input.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip4_mtrie.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip4_pg.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip4_source_check.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip6_format.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip6_forward.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip6_hop_by_hop.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip6_input.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip6_neighbor.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip6_pg.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip_checksum.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip_init.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/lookup.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/udp_format.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/udp_init.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/udp_local.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/udp_pg.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip_input_acl.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ip/ip_frag.lo: vnet/ip/$(am__dirstamp) \ + vnet/ip/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/$(am__dirstamp): + @$(MKDIR_P) vnet/ipsec + @: > vnet/ipsec/$(am__dirstamp) +vnet/ipsec/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/ipsec/$(DEPDIR) + @: > vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ipsec.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ipsec_cli.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ipsec_format.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ipsec_output.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ipsec_input.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ipsec_if.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ipsec_if_in.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ipsec_if_out.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/esp_encrypt.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/esp_decrypt.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ikev2.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ikev2_crypto.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ikev2_cli.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ikev2_payload.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/ipsec/ikev2_format.lo: vnet/ipsec/$(am__dirstamp) \ + vnet/ipsec/$(DEPDIR)/$(am__dirstamp) +vnet/osi/$(am__dirstamp): + @$(MKDIR_P) vnet/osi + @: > vnet/osi/$(am__dirstamp) +vnet/osi/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/osi/$(DEPDIR) + @: > vnet/osi/$(DEPDIR)/$(am__dirstamp) +vnet/osi/node.lo: vnet/osi/$(am__dirstamp) \ + vnet/osi/$(DEPDIR)/$(am__dirstamp) +vnet/osi/osi.lo: vnet/osi/$(am__dirstamp) \ + vnet/osi/$(DEPDIR)/$(am__dirstamp) +vnet/osi/pg.lo: vnet/osi/$(am__dirstamp) \ + vnet/osi/$(DEPDIR)/$(am__dirstamp) +vnet/map/$(am__dirstamp): + @$(MKDIR_P) vnet/map + @: > vnet/map/$(am__dirstamp) +vnet/map/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/map/$(DEPDIR) + @: > vnet/map/$(DEPDIR)/$(am__dirstamp) +vnet/map/map.lo: vnet/map/$(am__dirstamp) \ + vnet/map/$(DEPDIR)/$(am__dirstamp) +vnet/map/ip4_map.lo: vnet/map/$(am__dirstamp) \ + vnet/map/$(DEPDIR)/$(am__dirstamp) +vnet/map/ip6_map.lo: vnet/map/$(am__dirstamp) \ + vnet/map/$(DEPDIR)/$(am__dirstamp) +vnet/map/sixrd.lo: vnet/map/$(am__dirstamp) \ + vnet/map/$(DEPDIR)/$(am__dirstamp) +vnet/map/ip4_sixrd.lo: vnet/map/$(am__dirstamp) \ + vnet/map/$(DEPDIR)/$(am__dirstamp) +vnet/map/ip6_sixrd.lo: vnet/map/$(am__dirstamp) \ + vnet/map/$(DEPDIR)/$(am__dirstamp) +vnet/map/ip4_map_t.lo: vnet/map/$(am__dirstamp) \ + vnet/map/$(DEPDIR)/$(am__dirstamp) +vnet/map/ip6_map_t.lo: vnet/map/$(am__dirstamp) \ + vnet/map/$(DEPDIR)/$(am__dirstamp) +vnet/gre/$(am__dirstamp): + @$(MKDIR_P) vnet/gre + @: > vnet/gre/$(am__dirstamp) +vnet/gre/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/gre/$(DEPDIR) + @: > vnet/gre/$(DEPDIR)/$(am__dirstamp) +vnet/gre/gre.lo: vnet/gre/$(am__dirstamp) \ + vnet/gre/$(DEPDIR)/$(am__dirstamp) +vnet/gre/node.lo: vnet/gre/$(am__dirstamp) \ + vnet/gre/$(DEPDIR)/$(am__dirstamp) +vnet/gre/interface.lo: vnet/gre/$(am__dirstamp) \ + vnet/gre/$(DEPDIR)/$(am__dirstamp) +vnet/gre/pg.lo: vnet/gre/$(am__dirstamp) \ + vnet/gre/$(DEPDIR)/$(am__dirstamp) +vnet/l2tp/$(am__dirstamp): + @$(MKDIR_P) vnet/l2tp + @: > vnet/l2tp/$(am__dirstamp) +vnet/l2tp/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/l2tp/$(DEPDIR) + @: > vnet/l2tp/$(DEPDIR)/$(am__dirstamp) +vnet/l2tp/l2tp.lo: vnet/l2tp/$(am__dirstamp) \ + vnet/l2tp/$(DEPDIR)/$(am__dirstamp) +vnet/l2tp/encap.lo: vnet/l2tp/$(am__dirstamp) \ + vnet/l2tp/$(DEPDIR)/$(am__dirstamp) +vnet/l2tp/decap.lo: vnet/l2tp/$(am__dirstamp) \ + vnet/l2tp/$(DEPDIR)/$(am__dirstamp) +vnet/l2tp/pg.lo: vnet/l2tp/$(am__dirstamp) \ + vnet/l2tp/$(DEPDIR)/$(am__dirstamp) +vnet/mpls-gre/$(am__dirstamp): + @$(MKDIR_P) vnet/mpls-gre + @: > vnet/mpls-gre/$(am__dirstamp) +vnet/mpls-gre/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/mpls-gre/$(DEPDIR) + @: > vnet/mpls-gre/$(DEPDIR)/$(am__dirstamp) +vnet/mpls-gre/mpls.lo: vnet/mpls-gre/$(am__dirstamp) \ + vnet/mpls-gre/$(DEPDIR)/$(am__dirstamp) +vnet/mpls-gre/node.lo: vnet/mpls-gre/$(am__dirstamp) \ + vnet/mpls-gre/$(DEPDIR)/$(am__dirstamp) +vnet/mpls-gre/interface.lo: vnet/mpls-gre/$(am__dirstamp) \ + vnet/mpls-gre/$(DEPDIR)/$(am__dirstamp) +vnet/mpls-gre/policy_encap.lo: vnet/mpls-gre/$(am__dirstamp) \ + vnet/mpls-gre/$(DEPDIR)/$(am__dirstamp) +vnet/mpls-gre/pg.lo: vnet/mpls-gre/$(am__dirstamp) \ + vnet/mpls-gre/$(DEPDIR)/$(am__dirstamp) +vnet/nsh-gre/$(am__dirstamp): + @$(MKDIR_P) vnet/nsh-gre + @: > vnet/nsh-gre/$(am__dirstamp) +vnet/nsh-gre/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/nsh-gre/$(DEPDIR) + @: > vnet/nsh-gre/$(DEPDIR)/$(am__dirstamp) +vnet/nsh-gre/nsh_gre.lo: vnet/nsh-gre/$(am__dirstamp) \ + vnet/nsh-gre/$(DEPDIR)/$(am__dirstamp) +vnet/nsh-gre/encap.lo: vnet/nsh-gre/$(am__dirstamp) \ + vnet/nsh-gre/$(DEPDIR)/$(am__dirstamp) +vnet/nsh-gre/decap.lo: vnet/nsh-gre/$(am__dirstamp) \ + vnet/nsh-gre/$(DEPDIR)/$(am__dirstamp) +vnet/nsh-vxlan-gpe/$(am__dirstamp): + @$(MKDIR_P) vnet/nsh-vxlan-gpe + @: > vnet/nsh-vxlan-gpe/$(am__dirstamp) +vnet/nsh-vxlan-gpe/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/nsh-vxlan-gpe/$(DEPDIR) + @: > vnet/nsh-vxlan-gpe/$(DEPDIR)/$(am__dirstamp) +vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.lo: \ + vnet/nsh-vxlan-gpe/$(am__dirstamp) \ + vnet/nsh-vxlan-gpe/$(DEPDIR)/$(am__dirstamp) +vnet/nsh-vxlan-gpe/encap.lo: vnet/nsh-vxlan-gpe/$(am__dirstamp) \ + vnet/nsh-vxlan-gpe/$(DEPDIR)/$(am__dirstamp) +vnet/nsh-vxlan-gpe/decap.lo: vnet/nsh-vxlan-gpe/$(am__dirstamp) \ + vnet/nsh-vxlan-gpe/$(DEPDIR)/$(am__dirstamp) +vnet/lisp-gpe/$(am__dirstamp): + @$(MKDIR_P) vnet/lisp-gpe + @: > vnet/lisp-gpe/$(am__dirstamp) +vnet/lisp-gpe/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/lisp-gpe/$(DEPDIR) + @: > vnet/lisp-gpe/$(DEPDIR)/$(am__dirstamp) +vnet/lisp-gpe/lisp_gpe.lo: vnet/lisp-gpe/$(am__dirstamp) \ + vnet/lisp-gpe/$(DEPDIR)/$(am__dirstamp) +vnet/lisp-gpe/encap.lo: vnet/lisp-gpe/$(am__dirstamp) \ + vnet/lisp-gpe/$(DEPDIR)/$(am__dirstamp) +vnet/lisp-gpe/decap.lo: vnet/lisp-gpe/$(am__dirstamp) \ + vnet/lisp-gpe/$(DEPDIR)/$(am__dirstamp) +vnet/dhcp/$(am__dirstamp): + @$(MKDIR_P) vnet/dhcp + @: > vnet/dhcp/$(am__dirstamp) +vnet/dhcp/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/dhcp/$(DEPDIR) + @: > vnet/dhcp/$(DEPDIR)/$(am__dirstamp) +vnet/dhcp/client.lo: vnet/dhcp/$(am__dirstamp) \ + vnet/dhcp/$(DEPDIR)/$(am__dirstamp) +vnet/dhcp/proxy_node.lo: vnet/dhcp/$(am__dirstamp) \ + vnet/dhcp/$(DEPDIR)/$(am__dirstamp) +vnet/sr/$(am__dirstamp): + @$(MKDIR_P) vnet/sr + @: > vnet/sr/$(am__dirstamp) +vnet/sr/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/sr/$(DEPDIR) + @: > vnet/sr/$(DEPDIR)/$(am__dirstamp) +vnet/sr/sr.lo: vnet/sr/$(am__dirstamp) \ + vnet/sr/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/$(am__dirstamp): + @$(MKDIR_P) vnet/vcgn + @: > vnet/vcgn/$(am__dirstamp) +vnet/vcgn/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/vcgn/$(DEPDIR) + @: > vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_bulk_port.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_config.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_db_scanner.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_db_v2.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_debug_msg_handler.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_cli_handler.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_global.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ipv4_udp_inside_input.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ipv4_udp_inside_input_exceptions.lo: \ + vnet/vcgn/$(am__dirstamp) vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ipv4_udp_outside_input.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ipv4_tcp_inside_input.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ipv4_tcp_inside_input_exceptions.lo: \ + vnet/vcgn/$(am__dirstamp) vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ipv4_tcp_outside_input.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ipv4_icmp_query_inside_input.lo: \ + vnet/vcgn/$(am__dirstamp) vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ipv4_icmp_query_inside_input_exception.lo: \ + vnet/vcgn/$(am__dirstamp) vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ipv4_icmp_query_outside_input.lo: \ + vnet/vcgn/$(am__dirstamp) vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ipv4_icmp_error_inside_input.lo: \ + vnet/vcgn/$(am__dirstamp) vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ipv4_icmp_error_outside_input.lo: \ + vnet/vcgn/$(am__dirstamp) vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_logging.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_ports.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_util.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_show.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_syslog.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/cnat_v4_functions.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/index_list.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/spp_platform_trace_log.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/vcgn/vcgn_classify.lo: vnet/vcgn/$(am__dirstamp) \ + vnet/vcgn/$(DEPDIR)/$(am__dirstamp) +vnet/dhcpv6/$(am__dirstamp): + @$(MKDIR_P) vnet/dhcpv6 + @: > vnet/dhcpv6/$(am__dirstamp) +vnet/dhcpv6/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/dhcpv6/$(DEPDIR) + @: > vnet/dhcpv6/$(DEPDIR)/$(am__dirstamp) +vnet/dhcpv6/proxy_node.lo: vnet/dhcpv6/$(am__dirstamp) \ + vnet/dhcpv6/$(DEPDIR)/$(am__dirstamp) +vnet/flow/$(am__dirstamp): + @$(MKDIR_P) vnet/flow + @: > vnet/flow/$(am__dirstamp) +vnet/flow/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/flow/$(DEPDIR) + @: > vnet/flow/$(DEPDIR)/$(am__dirstamp) +vnet/flow/flow_report.lo: vnet/flow/$(am__dirstamp) \ + vnet/flow/$(DEPDIR)/$(am__dirstamp) +vnet/flow/flow_report_sample.lo: vnet/flow/$(am__dirstamp) \ + vnet/flow/$(DEPDIR)/$(am__dirstamp) +vnet/lawful-intercept/$(am__dirstamp): + @$(MKDIR_P) vnet/lawful-intercept + @: > vnet/lawful-intercept/$(am__dirstamp) +vnet/lawful-intercept/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/lawful-intercept/$(DEPDIR) + @: > vnet/lawful-intercept/$(DEPDIR)/$(am__dirstamp) +vnet/lawful-intercept/lawful_intercept.lo: \ + vnet/lawful-intercept/$(am__dirstamp) \ + vnet/lawful-intercept/$(DEPDIR)/$(am__dirstamp) +vnet/lawful-intercept/node.lo: vnet/lawful-intercept/$(am__dirstamp) \ + vnet/lawful-intercept/$(DEPDIR)/$(am__dirstamp) +vnet/pg/$(am__dirstamp): + @$(MKDIR_P) vnet/pg + @: > vnet/pg/$(am__dirstamp) +vnet/pg/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/pg/$(DEPDIR) + @: > vnet/pg/$(DEPDIR)/$(am__dirstamp) +vnet/pg/cli.lo: vnet/pg/$(am__dirstamp) \ + vnet/pg/$(DEPDIR)/$(am__dirstamp) +vnet/pg/edit.lo: vnet/pg/$(am__dirstamp) \ + vnet/pg/$(DEPDIR)/$(am__dirstamp) +vnet/pg/init.lo: vnet/pg/$(am__dirstamp) \ + vnet/pg/$(DEPDIR)/$(am__dirstamp) +vnet/pg/input.lo: vnet/pg/$(am__dirstamp) \ + vnet/pg/$(DEPDIR)/$(am__dirstamp) +vnet/pg/output.lo: vnet/pg/$(am__dirstamp) \ + vnet/pg/$(DEPDIR)/$(am__dirstamp) +vnet/pg/stream.lo: vnet/pg/$(am__dirstamp) \ + vnet/pg/$(DEPDIR)/$(am__dirstamp) +vnet/devices/dpdk/$(am__dirstamp): + @$(MKDIR_P) vnet/devices/dpdk + @: > vnet/devices/dpdk/$(am__dirstamp) +vnet/devices/dpdk/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/devices/dpdk/$(DEPDIR) + @: > vnet/devices/dpdk/$(DEPDIR)/$(am__dirstamp) +vnet/devices/dpdk/device.lo: vnet/devices/dpdk/$(am__dirstamp) \ + vnet/devices/dpdk/$(DEPDIR)/$(am__dirstamp) +vnet/devices/dpdk/init.lo: vnet/devices/dpdk/$(am__dirstamp) \ + vnet/devices/dpdk/$(DEPDIR)/$(am__dirstamp) +vnet/devices/dpdk/node.lo: vnet/devices/dpdk/$(am__dirstamp) \ + vnet/devices/dpdk/$(DEPDIR)/$(am__dirstamp) +vnet/devices/dpdk/threads.lo: vnet/devices/dpdk/$(am__dirstamp) \ + vnet/devices/dpdk/$(DEPDIR)/$(am__dirstamp) +vnet/devices/dpdk/vhost_user.lo: vnet/devices/dpdk/$(am__dirstamp) \ + vnet/devices/dpdk/$(DEPDIR)/$(am__dirstamp) +vnet/devices/dpdk/cli.lo: vnet/devices/dpdk/$(am__dirstamp) \ + vnet/devices/dpdk/$(DEPDIR)/$(am__dirstamp) +vnet/devices/virtio/$(am__dirstamp): + @$(MKDIR_P) vnet/devices/virtio + @: > vnet/devices/virtio/$(am__dirstamp) +vnet/devices/virtio/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/devices/virtio/$(DEPDIR) + @: > vnet/devices/virtio/$(DEPDIR)/$(am__dirstamp) +vnet/devices/virtio/vhost-user.lo: \ + vnet/devices/virtio/$(am__dirstamp) \ + vnet/devices/virtio/$(DEPDIR)/$(am__dirstamp) +vnet/devices/ssvm/$(am__dirstamp): + @$(MKDIR_P) vnet/devices/ssvm + @: > vnet/devices/ssvm/$(am__dirstamp) +vnet/devices/ssvm/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/devices/ssvm/$(DEPDIR) + @: > vnet/devices/ssvm/$(DEPDIR)/$(am__dirstamp) +vnet/devices/ssvm/ssvm_eth.lo: vnet/devices/ssvm/$(am__dirstamp) \ + vnet/devices/ssvm/$(DEPDIR)/$(am__dirstamp) +vnet/devices/ssvm/node.lo: vnet/devices/ssvm/$(am__dirstamp) \ + vnet/devices/ssvm/$(DEPDIR)/$(am__dirstamp) +vnet/unix/$(am__dirstamp): + @$(MKDIR_P) vnet/unix + @: > vnet/unix/$(am__dirstamp) +vnet/unix/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/unix/$(DEPDIR) + @: > vnet/unix/$(DEPDIR)/$(am__dirstamp) +vnet/unix/gdb_funcs.lo: vnet/unix/$(am__dirstamp) \ + vnet/unix/$(DEPDIR)/$(am__dirstamp) +vnet/unix/pcap.lo: vnet/unix/$(am__dirstamp) \ + vnet/unix/$(DEPDIR)/$(am__dirstamp) +vnet/unix/tapcli.lo: vnet/unix/$(am__dirstamp) \ + vnet/unix/$(DEPDIR)/$(am__dirstamp) +vnet/unix/tuntap.lo: vnet/unix/$(am__dirstamp) \ + vnet/unix/$(DEPDIR)/$(am__dirstamp) + +libvnet.la: $(libvnet_la_OBJECTS) $(libvnet_la_DEPENDENCIES) $(EXTRA_libvnet_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) -rpath $(libdir) $(libvnet_la_OBJECTS) $(libvnet_la_LIBADD) $(LIBS) +vnet/plugin/$(am__dirstamp): + @$(MKDIR_P) vnet/plugin + @: > vnet/plugin/$(am__dirstamp) +vnet/plugin/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vnet/plugin/$(DEPDIR) + @: > vnet/plugin/$(DEPDIR)/$(am__dirstamp) +vnet/plugin/p1.lo: vnet/plugin/$(am__dirstamp) \ + vnet/plugin/$(DEPDIR)/$(am__dirstamp) + +libvnetplugin.la: $(libvnetplugin_la_OBJECTS) $(libvnetplugin_la_DEPENDENCIES) $(EXTRA_libvnetplugin_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) -rpath $(libdir) $(libvnetplugin_la_OBJECTS) $(libvnetplugin_la_LIBADD) $(LIBS) + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +vnet/unix/pcap2pg.$(OBJEXT): vnet/unix/$(am__dirstamp) \ + vnet/unix/$(DEPDIR)/$(am__dirstamp) + +pcap2pg$(EXEEXT): $(pcap2pg_OBJECTS) $(pcap2pg_DEPENDENCIES) $(EXTRA_pcap2pg_DEPENDENCIES) + @rm -f pcap2pg$(EXEEXT) + $(AM_V_CCLD)$(pcap2pg_LINK) $(pcap2pg_OBJECTS) $(pcap2pg_LDADD) $(LIBS) +example/$(am__dirstamp): + @$(MKDIR_P) example + @: > example/$(am__dirstamp) +example/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) example/$(DEPDIR) + @: > example/$(DEPDIR)/$(am__dirstamp) +example/main_stub.$(OBJEXT): example/$(am__dirstamp) \ + example/$(DEPDIR)/$(am__dirstamp) + +vnet_unix$(EXEEXT): $(vnet_unix_OBJECTS) $(vnet_unix_DEPENDENCIES) $(EXTRA_vnet_unix_DEPENDENCIES) + @rm -f vnet_unix$(EXEEXT) + $(AM_V_CCLD)$(vnet_unix_LINK) $(vnet_unix_OBJECTS) $(vnet_unix_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f example/*.$(OBJEXT) + -rm -f vnet/*.$(OBJEXT) + -rm -f vnet/*.lo + -rm -f vnet/classify/*.$(OBJEXT) + -rm -f vnet/classify/*.lo + -rm -f vnet/devices/dpdk/*.$(OBJEXT) + -rm -f vnet/devices/dpdk/*.lo + -rm -f vnet/devices/ssvm/*.$(OBJEXT) + -rm -f vnet/devices/ssvm/*.lo + -rm -f vnet/devices/virtio/*.$(OBJEXT) + -rm -f vnet/devices/virtio/*.lo + -rm -f vnet/dhcp/*.$(OBJEXT) + -rm -f vnet/dhcp/*.lo + -rm -f vnet/dhcpv6/*.$(OBJEXT) + -rm -f vnet/dhcpv6/*.lo + -rm -f vnet/ethernet/*.$(OBJEXT) + -rm -f vnet/ethernet/*.lo + -rm -f vnet/flow/*.$(OBJEXT) + -rm -f vnet/flow/*.lo + -rm -f vnet/gre/*.$(OBJEXT) + -rm -f vnet/gre/*.lo + -rm -f vnet/hdlc/*.$(OBJEXT) + -rm -f vnet/hdlc/*.lo + -rm -f vnet/ip/*.$(OBJEXT) + -rm -f vnet/ip/*.lo + -rm -f vnet/ipsec/*.$(OBJEXT) + -rm -f vnet/ipsec/*.lo + -rm -f vnet/l2/*.$(OBJEXT) + -rm -f vnet/l2/*.lo + -rm -f vnet/l2tp/*.$(OBJEXT) + -rm -f vnet/l2tp/*.lo + -rm -f vnet/lawful-intercept/*.$(OBJEXT) + -rm -f vnet/lawful-intercept/*.lo + -rm -f vnet/lisp-gpe/*.$(OBJEXT) + -rm -f vnet/lisp-gpe/*.lo + -rm -f vnet/llc/*.$(OBJEXT) + -rm -f vnet/llc/*.lo + -rm -f vnet/map/*.$(OBJEXT) + -rm -f vnet/map/*.lo + -rm -f vnet/mpls-gre/*.$(OBJEXT) + -rm -f vnet/mpls-gre/*.lo + -rm -f vnet/nsh-gre/*.$(OBJEXT) + -rm -f vnet/nsh-gre/*.lo + -rm -f vnet/nsh-vxlan-gpe/*.$(OBJEXT) + -rm -f vnet/nsh-vxlan-gpe/*.lo + -rm -f vnet/osi/*.$(OBJEXT) + -rm -f vnet/osi/*.lo + -rm -f vnet/pg/*.$(OBJEXT) + -rm -f vnet/pg/*.lo + -rm -f vnet/plugin/*.$(OBJEXT) + -rm -f vnet/plugin/*.lo + -rm -f vnet/policer/*.$(OBJEXT) + -rm -f vnet/policer/*.lo + -rm -f vnet/ppp/*.$(OBJEXT) + -rm -f vnet/ppp/*.lo + -rm -f vnet/snap/*.$(OBJEXT) + -rm -f vnet/snap/*.lo + -rm -f vnet/sr/*.$(OBJEXT) + -rm -f vnet/sr/*.lo + -rm -f vnet/srp/*.$(OBJEXT) + -rm -f vnet/srp/*.lo + -rm -f vnet/unix/*.$(OBJEXT) + -rm -f vnet/unix/*.lo + -rm -f vnet/vcgn/*.$(OBJEXT) + -rm -f vnet/vcgn/*.lo + -rm -f vnet/vxlan/*.$(OBJEXT) + -rm -f vnet/vxlan/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@example/$(DEPDIR)/main_stub.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/$(DEPDIR)/config.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/$(DEPDIR)/interface.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/$(DEPDIR)/interface_cli.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/$(DEPDIR)/interface_format.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/$(DEPDIR)/interface_output.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/$(DEPDIR)/misc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/$(DEPDIR)/replication.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/$(DEPDIR)/rewrite.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/classify/$(DEPDIR)/input_acl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/classify/$(DEPDIR)/ip_classify.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/classify/$(DEPDIR)/vnet_classify.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/devices/dpdk/$(DEPDIR)/cli.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/devices/dpdk/$(DEPDIR)/device.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/devices/dpdk/$(DEPDIR)/init.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/devices/dpdk/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/devices/dpdk/$(DEPDIR)/threads.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/devices/dpdk/$(DEPDIR)/vhost_user.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/devices/ssvm/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/devices/ssvm/$(DEPDIR)/ssvm_eth.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/devices/virtio/$(DEPDIR)/vhost-user.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/dhcp/$(DEPDIR)/client.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/dhcp/$(DEPDIR)/proxy_node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/dhcpv6/$(DEPDIR)/proxy_node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ethernet/$(DEPDIR)/arp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ethernet/$(DEPDIR)/cli.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ethernet/$(DEPDIR)/format.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ethernet/$(DEPDIR)/init.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ethernet/$(DEPDIR)/interface.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ethernet/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ethernet/$(DEPDIR)/pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/flow/$(DEPDIR)/flow_report.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/flow/$(DEPDIR)/flow_report_sample.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/gre/$(DEPDIR)/gre.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/gre/$(DEPDIR)/interface.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/gre/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/gre/$(DEPDIR)/pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/hdlc/$(DEPDIR)/hdlc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/hdlc/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/hdlc/$(DEPDIR)/pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/format.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/icmp4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/icmp6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip46_cli.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip4_format.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip4_forward.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip4_hop_by_hop.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip4_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip4_mtrie.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip4_pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip4_source_check.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip6_format.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip6_forward.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip6_hop_by_hop.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip6_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip6_neighbor.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip6_pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip_checksum.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip_frag.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip_init.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/ip_input_acl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/lookup.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/udp_format.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/udp_init.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/udp_local.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ip/$(DEPDIR)/udp_pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/esp_decrypt.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/esp_encrypt.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ikev2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ikev2_cli.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ikev2_crypto.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ikev2_format.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ikev2_payload.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ipsec.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ipsec_cli.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ipsec_format.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ipsec_if.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ipsec_if_in.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ipsec_if_out.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ipsec_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ipsec/$(DEPDIR)/ipsec_output.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/feat_bitmap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_bd.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_bvi.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_classify.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_efp_filter.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_fib.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_flood.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_fwd.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_input_acl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_input_vtr.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_learn.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_output.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_output_acl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_patch.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_vtr.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2/$(DEPDIR)/l2_xcrw.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2tp/$(DEPDIR)/decap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2tp/$(DEPDIR)/encap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2tp/$(DEPDIR)/l2tp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/l2tp/$(DEPDIR)/pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/lawful-intercept/$(DEPDIR)/lawful_intercept.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/lawful-intercept/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/lisp-gpe/$(DEPDIR)/decap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/lisp-gpe/$(DEPDIR)/encap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/lisp-gpe/$(DEPDIR)/lisp_gpe.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/llc/$(DEPDIR)/llc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/llc/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/llc/$(DEPDIR)/pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/map/$(DEPDIR)/ip4_map.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/map/$(DEPDIR)/ip4_map_t.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/map/$(DEPDIR)/ip4_sixrd.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/map/$(DEPDIR)/ip6_map.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/map/$(DEPDIR)/ip6_map_t.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/map/$(DEPDIR)/ip6_sixrd.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/map/$(DEPDIR)/map.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/map/$(DEPDIR)/sixrd.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/mpls-gre/$(DEPDIR)/interface.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/mpls-gre/$(DEPDIR)/mpls.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/mpls-gre/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/mpls-gre/$(DEPDIR)/pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/mpls-gre/$(DEPDIR)/policy_encap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/nsh-gre/$(DEPDIR)/decap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/nsh-gre/$(DEPDIR)/encap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/nsh-gre/$(DEPDIR)/nsh_gre.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/nsh-vxlan-gpe/$(DEPDIR)/decap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/nsh-vxlan-gpe/$(DEPDIR)/encap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/nsh-vxlan-gpe/$(DEPDIR)/nsh_vxlan_gpe.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/osi/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/osi/$(DEPDIR)/osi.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/osi/$(DEPDIR)/pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/pg/$(DEPDIR)/cli.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/pg/$(DEPDIR)/edit.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/pg/$(DEPDIR)/init.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/pg/$(DEPDIR)/input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/pg/$(DEPDIR)/output.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/pg/$(DEPDIR)/stream.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/plugin/$(DEPDIR)/p1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/policer/$(DEPDIR)/node_funcs.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/policer/$(DEPDIR)/policer.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/policer/$(DEPDIR)/xlate.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ppp/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ppp/$(DEPDIR)/pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/ppp/$(DEPDIR)/ppp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/snap/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/snap/$(DEPDIR)/pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/snap/$(DEPDIR)/snap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/sr/$(DEPDIR)/sr.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/srp/$(DEPDIR)/format.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/srp/$(DEPDIR)/interface.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/srp/$(DEPDIR)/node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/srp/$(DEPDIR)/pg.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/unix/$(DEPDIR)/gdb_funcs.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/unix/$(DEPDIR)/pcap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/unix/$(DEPDIR)/pcap2pg.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/unix/$(DEPDIR)/tapcli.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/unix/$(DEPDIR)/tuntap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_bulk_port.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_cli_handler.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_config.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_db_scanner.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_db_v2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_debug_msg_handler.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_global.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ipv4_icmp_error_inside_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ipv4_icmp_error_outside_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ipv4_icmp_query_inside_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ipv4_icmp_query_inside_input_exception.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ipv4_icmp_query_outside_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ipv4_tcp_inside_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ipv4_tcp_inside_input_exceptions.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ipv4_tcp_outside_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ipv4_udp_inside_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ipv4_udp_inside_input_exceptions.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ipv4_udp_outside_input.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_logging.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_ports.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_show.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_syslog.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_util.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/cnat_v4_functions.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/index_list.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/spp_platform_trace_log.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vcgn/$(DEPDIR)/vcgn_classify.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vxlan/$(DEPDIR)/decap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vxlan/$(DEPDIR)/encap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@vnet/vxlan/$(DEPDIR)/vxlan.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf vnet/.libs vnet/_libs + -rm -rf vnet/classify/.libs vnet/classify/_libs + -rm -rf vnet/devices/dpdk/.libs vnet/devices/dpdk/_libs + -rm -rf vnet/devices/ssvm/.libs vnet/devices/ssvm/_libs + -rm -rf vnet/devices/virtio/.libs vnet/devices/virtio/_libs + -rm -rf vnet/dhcp/.libs vnet/dhcp/_libs + -rm -rf vnet/dhcpv6/.libs vnet/dhcpv6/_libs + -rm -rf vnet/ethernet/.libs vnet/ethernet/_libs + -rm -rf vnet/flow/.libs vnet/flow/_libs + -rm -rf vnet/gre/.libs vnet/gre/_libs + -rm -rf vnet/hdlc/.libs vnet/hdlc/_libs + -rm -rf vnet/ip/.libs vnet/ip/_libs + -rm -rf vnet/ipsec/.libs vnet/ipsec/_libs + -rm -rf vnet/l2/.libs vnet/l2/_libs + -rm -rf vnet/l2tp/.libs vnet/l2tp/_libs + -rm -rf vnet/lawful-intercept/.libs vnet/lawful-intercept/_libs + -rm -rf vnet/lisp-gpe/.libs vnet/lisp-gpe/_libs + -rm -rf vnet/llc/.libs vnet/llc/_libs + -rm -rf vnet/map/.libs vnet/map/_libs + -rm -rf vnet/mpls-gre/.libs vnet/mpls-gre/_libs + -rm -rf vnet/nsh-gre/.libs vnet/nsh-gre/_libs + -rm -rf vnet/nsh-vxlan-gpe/.libs vnet/nsh-vxlan-gpe/_libs + -rm -rf vnet/osi/.libs vnet/osi/_libs + -rm -rf vnet/pg/.libs vnet/pg/_libs + -rm -rf vnet/plugin/.libs vnet/plugin/_libs + -rm -rf vnet/policer/.libs vnet/policer/_libs + -rm -rf vnet/ppp/.libs vnet/ppp/_libs + -rm -rf vnet/snap/.libs vnet/snap/_libs + -rm -rf vnet/sr/.libs vnet/sr/_libs + -rm -rf vnet/srp/.libs vnet/srp/_libs + -rm -rf vnet/unix/.libs vnet/unix/_libs + -rm -rf vnet/vcgn/.libs vnet/vcgn/_libs + -rm -rf vnet/vxlan/.libs vnet/vxlan/_libs + +distclean-libtool: + -rm -f libtool config.lt +install-nobase_includeHEADERS: $(nobase_include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nobase_include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + $(am__nobase_list) | while read dir files; do \ + xfiles=; for file in $$files; do \ + if test -f "$$file"; then xfiles="$$xfiles $$file"; \ + else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ + test -z "$$xfiles" || { \ + test "x$$dir" = x. || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)/$$dir'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)/$$dir"; }; \ + echo " $(INSTALL_HEADER) $$xfiles '$(DESTDIR)$(includedir)/$$dir'"; \ + $(INSTALL_HEADER) $$xfiles "$(DESTDIR)$(includedir)/$$dir" || exit $$?; }; \ + done + +uninstall-nobase_includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nobase_include_HEADERS)'; test -n "$(includedir)" || list=; \ + $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscope: cscope.files + test ! -s cscope.files \ + || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) +clean-cscope: + -rm -f cscope.files +cscope.files: clean-cscope cscopelist +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + -rm -f cscope.out cscope.in.out cscope.po.out cscope.files + +distdir: $(DISTFILES) + $(am__remove_distdir) + test -d "$(distdir)" || mkdir "$(distdir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + -test -n "$(am__skip_mode_fix)" \ + || find "$(distdir)" -type d ! -perm -755 \ + -exec chmod u+rwx,go+rx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r "$(distdir)" +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + $(am__post_remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 + $(am__post_remove_distdir) + +dist-lzip: distdir + tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz + $(am__post_remove_distdir) + +dist-xz: distdir + tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz + $(am__post_remove_distdir) + +dist-tarZ: distdir + @echo WARNING: "Support for shar distribution archives is" \ + "deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__post_remove_distdir) + +dist-shar: distdir + @echo WARNING: "Support for distribution archives compressed with" \ + "legacy program 'compress' is deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz + $(am__post_remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__post_remove_distdir) + +dist dist-all: + $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' + $(am__post_remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.lz*) \ + lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ + *.tar.xz*) \ + xz -dc $(distdir).tar.xz | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + esac + chmod -R a-w $(distdir) + chmod u+w $(distdir) + mkdir $(distdir)/_build $(distdir)/_inst + chmod a-w $(distdir) + test -d $(distdir)/_build || exit 0; \ + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && am__cwd=`pwd` \ + && $(am__cd) $(distdir)/_build \ + && ../configure \ + $(AM_DISTCHECK_CONFIGURE_FLAGS) \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + --srcdir=.. --prefix="$$dc_install_base" \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ + && cd "$$am__cwd" \ + || exit 1 + $(am__post_remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @test -n '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: trying to run $@ with an empty' \ + '$$(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + $(am__cd) '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f example/$(DEPDIR)/$(am__dirstamp) + -rm -f example/$(am__dirstamp) + -rm -f vnet/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/$(am__dirstamp) + -rm -f vnet/classify/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/classify/$(am__dirstamp) + -rm -f vnet/devices/dpdk/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/devices/dpdk/$(am__dirstamp) + -rm -f vnet/devices/ssvm/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/devices/ssvm/$(am__dirstamp) + -rm -f vnet/devices/virtio/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/devices/virtio/$(am__dirstamp) + -rm -f vnet/dhcp/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/dhcp/$(am__dirstamp) + -rm -f vnet/dhcpv6/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/dhcpv6/$(am__dirstamp) + -rm -f vnet/ethernet/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/ethernet/$(am__dirstamp) + -rm -f vnet/flow/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/flow/$(am__dirstamp) + -rm -f vnet/gre/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/gre/$(am__dirstamp) + -rm -f vnet/hdlc/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/hdlc/$(am__dirstamp) + -rm -f vnet/ip/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/ip/$(am__dirstamp) + -rm -f vnet/ipsec/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/ipsec/$(am__dirstamp) + -rm -f vnet/l2/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/l2/$(am__dirstamp) + -rm -f vnet/l2tp/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/l2tp/$(am__dirstamp) + -rm -f vnet/lawful-intercept/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/lawful-intercept/$(am__dirstamp) + -rm -f vnet/lisp-gpe/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/lisp-gpe/$(am__dirstamp) + -rm -f vnet/llc/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/llc/$(am__dirstamp) + -rm -f vnet/map/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/map/$(am__dirstamp) + -rm -f vnet/mpls-gre/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/mpls-gre/$(am__dirstamp) + -rm -f vnet/nsh-gre/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/nsh-gre/$(am__dirstamp) + -rm -f vnet/nsh-vxlan-gpe/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/nsh-vxlan-gpe/$(am__dirstamp) + -rm -f vnet/osi/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/osi/$(am__dirstamp) + -rm -f vnet/pg/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/pg/$(am__dirstamp) + -rm -f vnet/plugin/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/plugin/$(am__dirstamp) + -rm -f vnet/policer/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/policer/$(am__dirstamp) + -rm -f vnet/ppp/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/ppp/$(am__dirstamp) + -rm -f vnet/snap/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/snap/$(am__dirstamp) + -rm -f vnet/sr/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/sr/$(am__dirstamp) + -rm -f vnet/srp/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/srp/$(am__dirstamp) + -rm -f vnet/unix/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/unix/$(am__dirstamp) + -rm -f vnet/vcgn/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/vcgn/$(am__dirstamp) + -rm -f vnet/vxlan/$(DEPDIR)/$(am__dirstamp) + -rm -f vnet/vxlan/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf example/$(DEPDIR) vnet/$(DEPDIR) vnet/classify/$(DEPDIR) vnet/devices/dpdk/$(DEPDIR) vnet/devices/ssvm/$(DEPDIR) vnet/devices/virtio/$(DEPDIR) vnet/dhcp/$(DEPDIR) vnet/dhcpv6/$(DEPDIR) vnet/ethernet/$(DEPDIR) vnet/flow/$(DEPDIR) vnet/gre/$(DEPDIR) vnet/hdlc/$(DEPDIR) vnet/ip/$(DEPDIR) vnet/ipsec/$(DEPDIR) vnet/l2/$(DEPDIR) vnet/l2tp/$(DEPDIR) vnet/lawful-intercept/$(DEPDIR) vnet/lisp-gpe/$(DEPDIR) vnet/llc/$(DEPDIR) vnet/map/$(DEPDIR) vnet/mpls-gre/$(DEPDIR) vnet/nsh-gre/$(DEPDIR) vnet/nsh-vxlan-gpe/$(DEPDIR) vnet/osi/$(DEPDIR) vnet/pg/$(DEPDIR) vnet/plugin/$(DEPDIR) vnet/policer/$(DEPDIR) vnet/ppp/$(DEPDIR) vnet/snap/$(DEPDIR) vnet/sr/$(DEPDIR) vnet/srp/$(DEPDIR) vnet/unix/$(DEPDIR) vnet/vcgn/$(DEPDIR) vnet/vxlan/$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-libtool distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-nobase_includeHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -rf example/$(DEPDIR) vnet/$(DEPDIR) vnet/classify/$(DEPDIR) vnet/devices/dpdk/$(DEPDIR) vnet/devices/ssvm/$(DEPDIR) vnet/devices/virtio/$(DEPDIR) vnet/dhcp/$(DEPDIR) vnet/dhcpv6/$(DEPDIR) vnet/ethernet/$(DEPDIR) vnet/flow/$(DEPDIR) vnet/gre/$(DEPDIR) vnet/hdlc/$(DEPDIR) vnet/ip/$(DEPDIR) vnet/ipsec/$(DEPDIR) vnet/l2/$(DEPDIR) vnet/l2tp/$(DEPDIR) vnet/lawful-intercept/$(DEPDIR) vnet/lisp-gpe/$(DEPDIR) vnet/llc/$(DEPDIR) vnet/map/$(DEPDIR) vnet/mpls-gre/$(DEPDIR) vnet/nsh-gre/$(DEPDIR) vnet/nsh-vxlan-gpe/$(DEPDIR) vnet/osi/$(DEPDIR) vnet/pg/$(DEPDIR) vnet/plugin/$(DEPDIR) vnet/policer/$(DEPDIR) vnet/ppp/$(DEPDIR) vnet/snap/$(DEPDIR) vnet/sr/$(DEPDIR) vnet/srp/$(DEPDIR) vnet/unix/$(DEPDIR) vnet/vcgn/$(DEPDIR) vnet/vxlan/$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES uninstall-nobase_includeHEADERS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--refresh check check-am clean \ + clean-cscope clean-generic clean-libLTLIBRARIES clean-libtool \ + clean-noinstPROGRAMS cscope cscopelist-am ctags ctags-am dist \ + dist-all dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ \ + dist-xz dist-zip distcheck distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags \ + distcleancheck distdir distuninstallcheck dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLTLIBRARIES install-man \ + install-nobase_includeHEADERS install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES \ + uninstall-nobase_includeHEADERS + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/vnet/aclocal.m4 b/vnet/aclocal.m4 new file mode 100644 index 00000000000..b08df583726 --- /dev/null +++ b/vnet/aclocal.m4 @@ -0,0 +1,9771 @@ +# generated automatically by aclocal 1.14.1 -*- Autoconf -*- + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. + +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, +[m4_warning([this file was generated for autoconf 2.69. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically 'autoreconf'.])]) + +# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- +# +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, +# 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +m4_define([_LT_COPYING], [dnl +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, +# 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is part of GNU Libtool. +# +# GNU Libtool is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Libtool; see the file COPYING. If not, a copy +# can be downloaded from http://www.gnu.org/licenses/gpl.html, or +# obtained by writing to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +]) + +# serial 57 LT_INIT + + +# LT_PREREQ(VERSION) +# ------------------ +# Complain and exit if this libtool version is less that VERSION. +m4_defun([LT_PREREQ], +[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, + [m4_default([$3], + [m4_fatal([Libtool version $1 or higher is required], + 63)])], + [$2])]) + + +# _LT_CHECK_BUILDDIR +# ------------------ +# Complain if the absolute build directory name contains unusual characters +m4_defun([_LT_CHECK_BUILDDIR], +[case `pwd` in + *\ * | *\ *) + AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; +esac +]) + + +# LT_INIT([OPTIONS]) +# ------------------ +AC_DEFUN([LT_INIT], +[AC_PREREQ([2.58])dnl We use AC_INCLUDES_DEFAULT +AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +AC_BEFORE([$0], [LT_LANG])dnl +AC_BEFORE([$0], [LT_OUTPUT])dnl +AC_BEFORE([$0], [LTDL_INIT])dnl +m4_require([_LT_CHECK_BUILDDIR])dnl + +dnl Autoconf doesn't catch unexpanded LT_ macros by default: +m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl +m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl +dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 +dnl unless we require an AC_DEFUNed macro: +AC_REQUIRE([LTOPTIONS_VERSION])dnl +AC_REQUIRE([LTSUGAR_VERSION])dnl +AC_REQUIRE([LTVERSION_VERSION])dnl +AC_REQUIRE([LTOBSOLETE_VERSION])dnl +m4_require([_LT_PROG_LTMAIN])dnl + +_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) + +dnl Parse OPTIONS +_LT_SET_OPTIONS([$0], [$1]) + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS="$ltmain" + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' +AC_SUBST(LIBTOOL)dnl + +_LT_SETUP + +# Only expand once: +m4_define([LT_INIT]) +])# LT_INIT + +# Old names: +AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) +AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PROG_LIBTOOL], []) +dnl AC_DEFUN([AM_PROG_LIBTOOL], []) + + +# _LT_CC_BASENAME(CC) +# ------------------- +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +m4_defun([_LT_CC_BASENAME], +[for cc_temp in $1""; do + case $cc_temp in + compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; + distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; + \-*) ;; + *) break;; + esac +done +cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +]) + + +# _LT_FILEUTILS_DEFAULTS +# ---------------------- +# It is okay to use these file commands and assume they have been set +# sensibly after `m4_require([_LT_FILEUTILS_DEFAULTS])'. +m4_defun([_LT_FILEUTILS_DEFAULTS], +[: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} +])# _LT_FILEUTILS_DEFAULTS + + +# _LT_SETUP +# --------- +m4_defun([_LT_SETUP], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl + +_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl +dnl +_LT_DECL([], [host_alias], [0], [The host system])dnl +_LT_DECL([], [host], [0])dnl +_LT_DECL([], [host_os], [0])dnl +dnl +_LT_DECL([], [build_alias], [0], [The build system])dnl +_LT_DECL([], [build], [0])dnl +_LT_DECL([], [build_os], [0])dnl +dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +dnl +AC_REQUIRE([AC_PROG_LN_S])dnl +test -z "$LN_S" && LN_S="ln -s" +_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl +dnl +AC_REQUIRE([LT_CMD_MAX_LEN])dnl +_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl +_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl +dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl +m4_require([_LT_CMD_RELOAD])dnl +m4_require([_LT_CHECK_MAGIC_METHOD])dnl +m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl +m4_require([_LT_CMD_OLD_ARCHIVE])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_WITH_SYSROOT])dnl + +_LT_CONFIG_LIBTOOL_INIT([ +# See if we are running on zsh, and set the options which allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi +]) +if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi + +_LT_CHECK_OBJDIR + +m4_require([_LT_TAG_COMPILER])dnl + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a `.a' archive for static linking (except MSVC, +# which needs '.lib'). +libext=a + +with_gnu_ld="$lt_cv_prog_gnu_ld" + +old_CC="$CC" +old_CFLAGS="$CFLAGS" + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +_LT_CC_BASENAME([$compiler]) + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + _LT_PATH_MAGIC + fi + ;; +esac + +# Use C for the default configuration in the libtool script +LT_SUPPORTED_TAG([CC]) +_LT_LANG_C_CONFIG +_LT_LANG_DEFAULT_CONFIG +_LT_CONFIG_COMMANDS +])# _LT_SETUP + + +# _LT_PREPARE_SED_QUOTE_VARS +# -------------------------- +# Define a few sed substitution that help us do robust quoting. +m4_defun([_LT_PREPARE_SED_QUOTE_VARS], +[# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\([["`\\]]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' +]) + +# _LT_PROG_LTMAIN +# --------------- +# Note that this code is called both from `configure', and `config.status' +# now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, +# `config.status' has no value for ac_aux_dir unless we are using Automake, +# so we pass a copy along to make sure it has a sensible value anyway. +m4_defun([_LT_PROG_LTMAIN], +[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl +_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) +ltmain="$ac_aux_dir/ltmain.sh" +])# _LT_PROG_LTMAIN + + + +# So that we can recreate a full libtool script including additional +# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS +# in macros and then make a single call at the end using the `libtool' +# label. + + +# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) +# ---------------------------------------- +# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL_INIT], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_INIT], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_INIT]) + + +# _LT_CONFIG_LIBTOOL([COMMANDS]) +# ------------------------------ +# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) + + +# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) +# ----------------------------------------------------- +m4_defun([_LT_CONFIG_SAVE_COMMANDS], +[_LT_CONFIG_LIBTOOL([$1]) +_LT_CONFIG_LIBTOOL_INIT([$2]) +]) + + +# _LT_FORMAT_COMMENT([COMMENT]) +# ----------------------------- +# Add leading comment marks to the start of each line, and a trailing +# full-stop to the whole comment if one is not present already. +m4_define([_LT_FORMAT_COMMENT], +[m4_ifval([$1], [ +m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], + [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) +)]) + + + + + +# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) +# ------------------------------------------------------------------- +# CONFIGNAME is the name given to the value in the libtool script. +# VARNAME is the (base) name used in the configure script. +# VALUE may be 0, 1 or 2 for a computed quote escaped value based on +# VARNAME. Any other value will be used directly. +m4_define([_LT_DECL], +[lt_if_append_uniq([lt_decl_varnames], [$2], [, ], + [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], + [m4_ifval([$1], [$1], [$2])]) + lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) + m4_ifval([$4], + [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) + lt_dict_add_subkey([lt_decl_dict], [$2], + [tagged?], [m4_ifval([$5], [yes], [no])])]) +]) + + +# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) +# -------------------------------------------------------- +m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) + + +# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_tag_varnames], +[_lt_decl_filter([tagged?], [yes], $@)]) + + +# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) +# --------------------------------------------------------- +m4_define([_lt_decl_filter], +[m4_case([$#], + [0], [m4_fatal([$0: too few arguments: $#])], + [1], [m4_fatal([$0: too few arguments: $#: $1])], + [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], + [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], + [lt_dict_filter([lt_decl_dict], $@)])[]dnl +]) + + +# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) +# -------------------------------------------------- +m4_define([lt_decl_quote_varnames], +[_lt_decl_filter([value], [1], $@)]) + + +# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_dquote_varnames], +[_lt_decl_filter([value], [2], $@)]) + + +# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_varnames_tagged], +[m4_assert([$# <= 2])dnl +_$0(m4_quote(m4_default([$1], [[, ]])), + m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), + m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) +m4_define([_lt_decl_varnames_tagged], +[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) + + +# lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_all_varnames], +[_$0(m4_quote(m4_default([$1], [[, ]])), + m4_if([$2], [], + m4_quote(lt_decl_varnames), + m4_quote(m4_shift($@))))[]dnl +]) +m4_define([_lt_decl_all_varnames], +[lt_join($@, lt_decl_varnames_tagged([$1], + lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl +]) + + +# _LT_CONFIG_STATUS_DECLARE([VARNAME]) +# ------------------------------------ +# Quote a variable value, and forward it to `config.status' so that its +# declaration there will have the same value as in `configure'. VARNAME +# must have a single quote delimited value for this to work. +m4_define([_LT_CONFIG_STATUS_DECLARE], +[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) + + +# _LT_CONFIG_STATUS_DECLARATIONS +# ------------------------------ +# We delimit libtool config variables with single quotes, so when +# we write them to config.status, we have to be sure to quote all +# embedded single quotes properly. In configure, this macro expands +# each variable declared with _LT_DECL (and _LT_TAGDECL) into: +# +# <var>='`$ECHO "$<var>" | $SED "$delay_single_quote_subst"`' +m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], +[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), + [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAGS +# ---------------- +# Output comment and list of tags supported by the script +m4_defun([_LT_LIBTOOL_TAGS], +[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl +available_tags="_LT_TAGS"dnl +]) + + +# _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) +# ----------------------------------- +# Extract the dictionary values for VARNAME (optionally with TAG) and +# expand to a commented shell variable setting: +# +# # Some comment about what VAR is for. +# visible_name=$lt_internal_name +m4_define([_LT_LIBTOOL_DECLARE], +[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], + [description])))[]dnl +m4_pushdef([_libtool_name], + m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl +m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), + [0], [_libtool_name=[$]$1], + [1], [_libtool_name=$lt_[]$1], + [2], [_libtool_name=$lt_[]$1], + [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl +m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl +]) + + +# _LT_LIBTOOL_CONFIG_VARS +# ----------------------- +# Produce commented declarations of non-tagged libtool config variables +# suitable for insertion in the LIBTOOL CONFIG section of the `libtool' +# script. Tagged libtool config variables (even for the LIBTOOL CONFIG +# section) are produced by _LT_LIBTOOL_TAG_VARS. +m4_defun([_LT_LIBTOOL_CONFIG_VARS], +[m4_foreach([_lt_var], + m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAG_VARS(TAG) +# ------------------------- +m4_define([_LT_LIBTOOL_TAG_VARS], +[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) + + +# _LT_TAGVAR(VARNAME, [TAGNAME]) +# ------------------------------ +m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) + + +# _LT_CONFIG_COMMANDS +# ------------------- +# Send accumulated output to $CONFIG_STATUS. Thanks to the lists of +# variables for single and double quote escaping we saved from calls +# to _LT_DECL, we can put quote escaped variables declarations +# into `config.status', and then the shell code to quote escape them in +# for loops in `config.status'. Finally, any additional code accumulated +# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. +m4_defun([_LT_CONFIG_COMMANDS], +[AC_PROVIDE_IFELSE([LT_OUTPUT], + dnl If the libtool generation code has been placed in $CONFIG_LT, + dnl instead of duplicating it all over again into config.status, + dnl then we will have config.status run $CONFIG_LT later, so it + dnl needs to know what name is stored there: + [AC_CONFIG_COMMANDS([libtool], + [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], + dnl If the libtool generation code is destined for config.status, + dnl expand the accumulated commands and init code now: + [AC_CONFIG_COMMANDS([libtool], + [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) +])#_LT_CONFIG_COMMANDS + + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], +[ + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +_LT_CONFIG_STATUS_DECLARATIONS +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$[]1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_quote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_dquote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +_LT_OUTPUT_LIBTOOL_INIT +]) + +# _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) +# ------------------------------------ +# Generate a child script FILE with all initialization necessary to +# reuse the environment learned by the parent script, and make the +# file executable. If COMMENT is supplied, it is inserted after the +# `#!' sequence but before initialization text begins. After this +# macro, additional text can be appended to FILE to form the body of +# the child script. The macro ends with non-zero status if the +# file could not be fully written (such as if the disk is full). +m4_ifdef([AS_INIT_GENERATED], +[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], +[m4_defun([_LT_GENERATED_FILE_INIT], +[m4_require([AS_PREPARE])]dnl +[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl +[lt_write_fail=0 +cat >$1 <<_ASEOF || lt_write_fail=1 +#! $SHELL +# Generated by $as_me. +$2 +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$1 <<\_ASEOF || lt_write_fail=1 +AS_SHELL_SANITIZE +_AS_PREPARE +exec AS_MESSAGE_FD>&1 +_ASEOF +test $lt_write_fail = 0 && chmod +x $1[]dnl +m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT + +# LT_OUTPUT +# --------- +# This macro allows early generation of the libtool script (before +# AC_OUTPUT is called), incase it is used in configure for compilation +# tests. +AC_DEFUN([LT_OUTPUT], +[: ${CONFIG_LT=./config.lt} +AC_MSG_NOTICE([creating $CONFIG_LT]) +_LT_GENERATED_FILE_INIT(["$CONFIG_LT"], +[# Run this file to recreate a libtool stub with the current configuration.]) + +cat >>"$CONFIG_LT" <<\_LTEOF +lt_cl_silent=false +exec AS_MESSAGE_LOG_FD>>config.log +{ + echo + AS_BOX([Running $as_me.]) +} >&AS_MESSAGE_LOG_FD + +lt_cl_help="\ +\`$as_me' creates a local libtool stub from the current configuration, +for use in further configure time tests before the real libtool is +generated. + +Usage: $[0] [[OPTIONS]] + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + +Report bugs to <bug-libtool@gnu.org>." + +lt_cl_version="\ +m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl +m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) +configured by $[0], generated by m4_PACKAGE_STRING. + +Copyright (C) 2011 Free Software Foundation, Inc. +This config.lt script is free software; the Free Software Foundation +gives unlimited permision to copy, distribute and modify it." + +while test $[#] != 0 +do + case $[1] in + --version | --v* | -V ) + echo "$lt_cl_version"; exit 0 ;; + --help | --h* | -h ) + echo "$lt_cl_help"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --quiet | --q* | --silent | --s* | -q ) + lt_cl_silent=: ;; + + -*) AC_MSG_ERROR([unrecognized option: $[1] +Try \`$[0] --help' for more information.]) ;; + + *) AC_MSG_ERROR([unrecognized argument: $[1] +Try \`$[0] --help' for more information.]) ;; + esac + shift +done + +if $lt_cl_silent; then + exec AS_MESSAGE_FD>/dev/null +fi +_LTEOF + +cat >>"$CONFIG_LT" <<_LTEOF +_LT_OUTPUT_LIBTOOL_COMMANDS_INIT +_LTEOF + +cat >>"$CONFIG_LT" <<\_LTEOF +AC_MSG_NOTICE([creating $ofile]) +_LT_OUTPUT_LIBTOOL_COMMANDS +AS_EXIT(0) +_LTEOF +chmod +x "$CONFIG_LT" + +# configure is writing to config.log, but config.lt does its own redirection, +# appending to config.log, which fails on DOS, as config.log is still kept +# open by configure. Here we exec the FD to /dev/null, effectively closing +# config.log, so it can be properly (re)opened and appended to by config.lt. +lt_cl_success=: +test "$silent" = yes && + lt_config_lt_args="$lt_config_lt_args --quiet" +exec AS_MESSAGE_LOG_FD>/dev/null +$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false +exec AS_MESSAGE_LOG_FD>>config.log +$lt_cl_success || AS_EXIT(1) +])# LT_OUTPUT + + +# _LT_CONFIG(TAG) +# --------------- +# If TAG is the built-in tag, create an initial libtool script with a +# default configuration from the untagged config vars. Otherwise add code +# to config.status for appending the configuration named by TAG from the +# matching tagged config vars. +m4_defun([_LT_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_CONFIG_SAVE_COMMANDS([ + m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl + m4_if(_LT_TAG, [C], [ + # See if we are running on zsh, and set the options which allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST + fi + + cfgfile="${ofile}T" + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL + +# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. +# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION +# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: +# NOTE: Changes made to this file will be lost: look at ltmain.sh. +# +_LT_COPYING +_LT_LIBTOOL_TAGS + +# ### BEGIN LIBTOOL CONFIG +_LT_LIBTOOL_CONFIG_VARS +_LT_LIBTOOL_TAG_VARS +# ### END LIBTOOL CONFIG + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + _LT_PROG_LTMAIN + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + sed '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + _LT_PROG_REPLACE_SHELLFNS + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" +], +[cat <<_LT_EOF >> "$ofile" + +dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded +dnl in a comment (ie after a #). +# ### BEGIN LIBTOOL TAG CONFIG: $1 +_LT_LIBTOOL_TAG_VARS(_LT_TAG) +# ### END LIBTOOL TAG CONFIG: $1 +_LT_EOF +])dnl /m4_if +], +[m4_if([$1], [], [ + PACKAGE='$PACKAGE' + VERSION='$VERSION' + TIMESTAMP='$TIMESTAMP' + RM='$RM' + ofile='$ofile'], []) +])dnl /_LT_CONFIG_SAVE_COMMANDS +])# _LT_CONFIG + + +# LT_SUPPORTED_TAG(TAG) +# --------------------- +# Trace this macro to discover what tags are supported by the libtool +# --tag option, using: +# autoconf --trace 'LT_SUPPORTED_TAG:$1' +AC_DEFUN([LT_SUPPORTED_TAG], []) + + +# C support is built-in for now +m4_define([_LT_LANG_C_enabled], []) +m4_define([_LT_TAGS], []) + + +# LT_LANG(LANG) +# ------------- +# Enable libtool support for the given language if not already enabled. +AC_DEFUN([LT_LANG], +[AC_BEFORE([$0], [LT_OUTPUT])dnl +m4_case([$1], + [C], [_LT_LANG(C)], + [C++], [_LT_LANG(CXX)], + [Go], [_LT_LANG(GO)], + [Java], [_LT_LANG(GCJ)], + [Fortran 77], [_LT_LANG(F77)], + [Fortran], [_LT_LANG(FC)], + [Windows Resource], [_LT_LANG(RC)], + [m4_ifdef([_LT_LANG_]$1[_CONFIG], + [_LT_LANG($1)], + [m4_fatal([$0: unsupported language: "$1"])])])dnl +])# LT_LANG + + +# _LT_LANG(LANGNAME) +# ------------------ +m4_defun([_LT_LANG], +[m4_ifdef([_LT_LANG_]$1[_enabled], [], + [LT_SUPPORTED_TAG([$1])dnl + m4_append([_LT_TAGS], [$1 ])dnl + m4_define([_LT_LANG_]$1[_enabled], [])dnl + _LT_LANG_$1_CONFIG($1)])dnl +])# _LT_LANG + + +m4_ifndef([AC_PROG_GO], [ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_GO. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +m4_defun([AC_PROG_GO], +[AC_LANG_PUSH(Go)dnl +AC_ARG_VAR([GOC], [Go compiler command])dnl +AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl +_AC_ARG_VAR_LDFLAGS()dnl +AC_CHECK_TOOL(GOC, gccgo) +if test -z "$GOC"; then + if test -n "$ac_tool_prefix"; then + AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo]) + fi +fi +if test -z "$GOC"; then + AC_CHECK_PROG(GOC, gccgo, gccgo, false) +fi +])#m4_defun +])#m4_ifndef + + +# _LT_LANG_DEFAULT_CONFIG +# ----------------------- +m4_defun([_LT_LANG_DEFAULT_CONFIG], +[AC_PROVIDE_IFELSE([AC_PROG_CXX], + [LT_LANG(CXX)], + [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) + +AC_PROVIDE_IFELSE([AC_PROG_F77], + [LT_LANG(F77)], + [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) + +AC_PROVIDE_IFELSE([AC_PROG_FC], + [LT_LANG(FC)], + [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) + +dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal +dnl pulling things in needlessly. +AC_PROVIDE_IFELSE([AC_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([LT_PROG_GCJ], + [LT_LANG(GCJ)], + [m4_ifdef([AC_PROG_GCJ], + [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([A][M_PROG_GCJ], + [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([LT_PROG_GCJ], + [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) + +AC_PROVIDE_IFELSE([AC_PROG_GO], + [LT_LANG(GO)], + [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])]) + +AC_PROVIDE_IFELSE([LT_PROG_RC], + [LT_LANG(RC)], + [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) +])# _LT_LANG_DEFAULT_CONFIG + +# Obsolete macros: +AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) +AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) +AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) +AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) +AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_CXX], []) +dnl AC_DEFUN([AC_LIBTOOL_F77], []) +dnl AC_DEFUN([AC_LIBTOOL_FC], []) +dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) +dnl AC_DEFUN([AC_LIBTOOL_RC], []) + + +# _LT_TAG_COMPILER +# ---------------- +m4_defun([_LT_TAG_COMPILER], +[AC_REQUIRE([AC_PROG_CC])dnl + +_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl +_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl +_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl +_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC +])# _LT_TAG_COMPILER + + +# _LT_COMPILER_BOILERPLATE +# ------------------------ +# Check for compiler boilerplate output or warnings with +# the simple compiler test code. +m4_defun([_LT_COMPILER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* +])# _LT_COMPILER_BOILERPLATE + + +# _LT_LINKER_BOILERPLATE +# ---------------------- +# Check for linker boilerplate output or warnings with +# the simple link test code. +m4_defun([_LT_LINKER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* +])# _LT_LINKER_BOILERPLATE + +# _LT_REQUIRED_DARWIN_CHECKS +# ------------------------- +m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ + case $host_os in + rhapsody* | darwin*) + AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) + AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) + AC_CHECK_TOOL([LIPO], [lipo], [:]) + AC_CHECK_TOOL([OTOOL], [otool], [:]) + AC_CHECK_TOOL([OTOOL64], [otool64], [:]) + _LT_DECL([], [DSYMUTIL], [1], + [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) + _LT_DECL([], [NMEDIT], [1], + [Tool to change global to local symbols on Mac OS X]) + _LT_DECL([], [LIPO], [1], + [Tool to manipulate fat objects and archives on Mac OS X]) + _LT_DECL([], [OTOOL], [1], + [ldd/readelf like tool for Mach-O binaries on Mac OS X]) + _LT_DECL([], [OTOOL64], [1], + [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) + + AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], + [lt_cv_apple_cc_single_mod=no + if test -z "${LT_MULTI_MODULE}"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test $_lt_result -eq 0; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi]) + + AC_CACHE_CHECK([for -exported_symbols_list linker flag], + [lt_cv_ld_exported_symbols_list], + [lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [lt_cv_ld_exported_symbols_list=yes], + [lt_cv_ld_exported_symbols_list=no]) + LDFLAGS="$save_LDFLAGS" + ]) + + AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], + [lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD + echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD + $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD + echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD + $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + ]) + case $host_os in + rhapsody* | darwin1.[[012]]) + _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + darwin*) # darwin 5.x on + # if running on 10.5 or later, the deployment target defaults + # to the OS version, if on x86, and 10.4, the deployment + # target defaults to 10.4. Don't you love it? + case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in + 10.0,*86*-darwin8*|10.0,*-darwin[[91]]*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + 10.[[012]]*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + 10.*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test "$lt_cv_apple_cc_single_mod" = "yes"; then + _lt_dar_single_mod='$single_module' + fi + if test "$lt_cv_ld_exported_symbols_list" = "yes"; then + _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}' + fi + if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac +]) + + +# _LT_DARWIN_LINKER_FEATURES([TAG]) +# --------------------------------- +# Checks for linker and compiler features on darwin +m4_defun([_LT_DARWIN_LINKER_FEATURES], +[ + m4_require([_LT_REQUIRED_DARWIN_CHECKS]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_automatic, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + if test "$lt_cv_ld_force_load" = "yes"; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes], + [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes]) + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='' + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)="$_lt_dar_allow_undefined" + case $cc_basename in + ifort*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test "$_lt_dar_can_shared" = "yes"; then + output_verbose_link_cmd=func_echo_all + _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" + _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" + _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" + _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" + m4_if([$1], [CXX], +[ if test "$lt_cv_apple_cc_single_mod" != "yes"; then + _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}" + _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}" + fi +],[]) + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi +]) + +# _LT_SYS_MODULE_PATH_AIX([TAGNAME]) +# ---------------------------------- +# Links a minimal program and checks the executable +# for the system default hardcoded library path. In most cases, +# this is /usr/lib:/lib, but when the MPI compilers are used +# the location of the communication and MPI libs are included too. +# If we don't find anything, use the default library path according +# to the aix ld manual. +# Store the results from the different compilers for each TAGNAME. +# Allow to override them for all tags through lt_cv_aix_libpath. +m4_defun([_LT_SYS_MODULE_PATH_AIX], +[m4_require([_LT_DECL_SED])dnl +if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], + [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ + lt_aix_libpath_sed='[ + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }]' + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi],[]) + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])="/usr/lib:/lib" + fi + ]) + aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) +fi +])# _LT_SYS_MODULE_PATH_AIX + + +# _LT_SHELL_INIT(ARG) +# ------------------- +m4_define([_LT_SHELL_INIT], +[m4_divert_text([M4SH-INIT], [$1 +])])# _LT_SHELL_INIT + + + +# _LT_PROG_ECHO_BACKSLASH +# ----------------------- +# Find how we can fake an echo command that does not interpret backslash. +# In particular, with Autoconf 2.60 or later we add some code to the start +# of the generated configure script which will find a shell with a builtin +# printf (which we can use as an echo command). +m4_defun([_LT_PROG_ECHO_BACKSLASH], +[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +AC_MSG_CHECKING([how to print strings]) +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$[]1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + +case "$ECHO" in + printf*) AC_MSG_RESULT([printf]) ;; + print*) AC_MSG_RESULT([print -r]) ;; + *) AC_MSG_RESULT([cat]) ;; +esac + +m4_ifdef([_AS_DETECT_SUGGESTED], +[_AS_DETECT_SUGGESTED([ + test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test "X`printf %s $ECHO`" = "X$ECHO" \ + || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) + +_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) +_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) +])# _LT_PROG_ECHO_BACKSLASH + + +# _LT_WITH_SYSROOT +# ---------------- +AC_DEFUN([_LT_WITH_SYSROOT], +[AC_MSG_CHECKING([for sysroot]) +AC_ARG_WITH([sysroot], +[ --with-sysroot[=DIR] Search for dependent libraries within DIR + (or the compiler's sysroot if not specified).], +[], [with_sysroot=no]) + +dnl lt_sysroot will always be passed unquoted. We quote it here +dnl in case the user passed a directory name. +lt_sysroot= +case ${with_sysroot} in #( + yes) + if test "$GCC" = yes; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + AC_MSG_RESULT([${with_sysroot}]) + AC_MSG_ERROR([The sysroot must be an absolute path.]) + ;; +esac + + AC_MSG_RESULT([${lt_sysroot:-no}]) +_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl +[dependent libraries, and in which our libraries should be installed.])]) + +# _LT_ENABLE_LOCK +# --------------- +m4_defun([_LT_ENABLE_LOCK], +[AC_ARG_ENABLE([libtool-lock], + [AS_HELP_STRING([--disable-libtool-lock], + [avoid locking (might break parallel builds)])]) +test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE="32" + ;; + *ELF-64*) + HPUX_IA64_MODE="64" + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out which ABI we are using. + echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + if test "$lt_cv_prog_gnu_ld" = yes; then + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + case `/usr/bin/file conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac + ;; + powerpc64le-*) + LD="${LD-ld} -m elf32lppclinux" + ;; + powerpc64-*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + powerpcle-*) + LD="${LD-ld} -m elf64lppc" + ;; + powerpc-*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -belf" + AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, + [AC_LANG_PUSH(C) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) + AC_LANG_POP]) + if test x"$lt_cv_cc_needs_belf" != x"yes"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS="$SAVE_CFLAGS" + fi + ;; +*-*solaris*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD="${LD-ld}_sol2" + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks="$enable_libtool_lock" +])# _LT_ENABLE_LOCK + + +# _LT_PROG_AR +# ----------- +m4_defun([_LT_PROG_AR], +[AC_CHECK_TOOLS(AR, [ar], false) +: ${AR=ar} +: ${AR_FLAGS=cru} +_LT_DECL([], [AR], [1], [The archiver]) +_LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive]) + +AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], + [lt_cv_ar_at_file=no + AC_COMPILE_IFELSE([AC_LANG_PROGRAM], + [echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([lt_ar_try]) + if test "$ac_status" -eq 0; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + AC_TRY_EVAL([lt_ar_try]) + if test "$ac_status" -ne 0; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + ]) + ]) + +if test "x$lt_cv_ar_at_file" = xno; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi +_LT_DECL([], [archiver_list_spec], [1], + [How to feed a file listing to the archiver]) +])# _LT_PROG_AR + + +# _LT_CMD_OLD_ARCHIVE +# ------------------- +m4_defun([_LT_CMD_OLD_ARCHIVE], +[_LT_PROG_AR + +AC_CHECK_TOOL(STRIP, strip, :) +test -z "$STRIP" && STRIP=: +_LT_DECL([], [STRIP], [1], [A symbol stripping program]) + +AC_CHECK_TOOL(RANLIB, ranlib, :) +test -z "$RANLIB" && RANLIB=: +_LT_DECL([], [RANLIB], [1], + [Commands used to install an old-style archive]) + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac +_LT_DECL([], [old_postinstall_cmds], [2]) +_LT_DECL([], [old_postuninstall_cmds], [2]) +_LT_TAGDECL([], [old_archive_cmds], [2], + [Commands used to build an old-style archive]) +_LT_DECL([], [lock_old_archive_extraction], [0], + [Whether to use a lock for old archive extraction]) +])# _LT_CMD_OLD_ARCHIVE + + +# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------------------- +# Check whether the given compiler option works +AC_DEFUN([_LT_COMPILER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$3" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + fi + $RM conftest* +]) + +if test x"[$]$2" = xyes; then + m4_if([$5], , :, [$5]) +else + m4_if([$6], , :, [$6]) +fi +])# _LT_COMPILER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) + + +# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------- +# Check whether the given linker option works +AC_DEFUN([_LT_LINKER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $3" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&AS_MESSAGE_LOG_FD + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + else + $2=yes + fi + fi + $RM -r conftest* + LDFLAGS="$save_LDFLAGS" +]) + +if test x"[$]$2" = xyes; then + m4_if([$4], , :, [$4]) +else + m4_if([$5], , :, [$5]) +fi +])# _LT_LINKER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) + + +# LT_CMD_MAX_LEN +#--------------- +AC_DEFUN([LT_CMD_MAX_LEN], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +# find the maximum length of command line arguments +AC_MSG_CHECKING([the maximum length of command line arguments]) +AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl + i=0 + teststring="ABCD" + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ ]]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8 ; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test $i != 17 # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac +]) +if test -n $lt_cv_sys_max_cmd_len ; then + AC_MSG_RESULT($lt_cv_sys_max_cmd_len) +else + AC_MSG_RESULT(none) +fi +max_cmd_len=$lt_cv_sys_max_cmd_len +_LT_DECL([], [max_cmd_len], [0], + [What is the maximum length of a command?]) +])# LT_CMD_MAX_LEN + +# Old name: +AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) + + +# _LT_HEADER_DLFCN +# ---------------- +m4_defun([_LT_HEADER_DLFCN], +[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl +])# _LT_HEADER_DLFCN + + +# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, +# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) +# ---------------------------------------------------------------- +m4_defun([_LT_TRY_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test "$cross_compiling" = yes; then : + [$4] +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +[#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include <dlfcn.h> +#endif + +#include <stdio.h> + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisbility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +}] +_LT_EOF + if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) $1 ;; + x$lt_dlneed_uscore) $2 ;; + x$lt_dlunknown|x*) $3 ;; + esac + else : + # compilation failed + $3 + fi +fi +rm -fr conftest* +])# _LT_TRY_DLOPEN_SELF + + +# LT_SYS_DLOPEN_SELF +# ------------------ +AC_DEFUN([LT_SYS_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test "x$enable_dlopen" != xyes; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen="load_add_on" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen="LoadLibrary" + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen="dlopen" + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[ + lt_cv_dlopen="dyld" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ]) + ;; + + *) + AC_CHECK_FUNC([shl_load], + [lt_cv_dlopen="shl_load"], + [AC_CHECK_LIB([dld], [shl_load], + [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"], + [AC_CHECK_FUNC([dlopen], + [lt_cv_dlopen="dlopen"], + [AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], + [AC_CHECK_LIB([svld], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], + [AC_CHECK_LIB([dld], [dld_link], + [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"]) + ]) + ]) + ]) + ]) + ]) + ;; + esac + + if test "x$lt_cv_dlopen" != xno; then + enable_dlopen=yes + else + enable_dlopen=no + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS="$CPPFLAGS" + test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS="$LDFLAGS" + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS="$LIBS" + LIBS="$lt_cv_dlopen_libs $LIBS" + + AC_CACHE_CHECK([whether a program can dlopen itself], + lt_cv_dlopen_self, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, + lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) + ]) + + if test "x$lt_cv_dlopen_self" = xyes; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + AC_CACHE_CHECK([whether a statically linked program can dlopen itself], + lt_cv_dlopen_self_static, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, + lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) + ]) + fi + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi +_LT_DECL([dlopen_support], [enable_dlopen], [0], + [Whether dlopen is supported]) +_LT_DECL([dlopen_self], [enable_dlopen_self], [0], + [Whether dlopen of programs is supported]) +_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], + [Whether dlopen of statically linked programs is supported]) +])# LT_SYS_DLOPEN_SELF + +# Old name: +AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) + + +# _LT_COMPILER_C_O([TAGNAME]) +# --------------------------- +# Check to see if options -c and -o are simultaneously supported by compiler. +# This macro does not hard code the compiler like AC_PROG_CC_C_O. +m4_defun([_LT_COMPILER_C_O], +[m4_require([_LT_DECL_SED])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + fi + fi + chmod u+w . 2>&AS_MESSAGE_LOG_FD + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* +]) +_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], + [Does compiler simultaneously support -c and -o options?]) +])# _LT_COMPILER_C_O + + +# _LT_COMPILER_FILE_LOCKS([TAGNAME]) +# ---------------------------------- +# Check to see if we can do hard links to lock some files if needed +m4_defun([_LT_COMPILER_FILE_LOCKS], +[m4_require([_LT_ENABLE_LOCK])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_COMPILER_C_O([$1]) + +hard_links="nottested" +if test "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then + # do not overwrite the value of need_locks provided by the user + AC_MSG_CHECKING([if we can lock with hard links]) + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + AC_MSG_RESULT([$hard_links]) + if test "$hard_links" = no; then + AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe]) + need_locks=warn + fi +else + need_locks=no +fi +_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) +])# _LT_COMPILER_FILE_LOCKS + + +# _LT_CHECK_OBJDIR +# ---------------- +m4_defun([_LT_CHECK_OBJDIR], +[AC_CACHE_CHECK([for objdir], [lt_cv_objdir], +[rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null]) +objdir=$lt_cv_objdir +_LT_DECL([], [objdir], [0], + [The name of the directory that contains temporary libtool files])dnl +m4_pattern_allow([LT_OBJDIR])dnl +AC_DEFINE_UNQUOTED(LT_OBJDIR, "$lt_cv_objdir/", + [Define to the sub-directory in which libtool stores uninstalled libraries.]) +])# _LT_CHECK_OBJDIR + + +# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) +# -------------------------------------- +# Check hardcoding attributes. +m4_defun([_LT_LINKER_HARDCODE_LIBPATH], +[AC_MSG_CHECKING([how to hardcode library paths into programs]) +_LT_TAGVAR(hardcode_action, $1)= +if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || + test -n "$_LT_TAGVAR(runpath_var, $1)" || + test "X$_LT_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then + + # We can hardcode non-existent directories. + if test "$_LT_TAGVAR(hardcode_direct, $1)" != no && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" != no && + test "$_LT_TAGVAR(hardcode_minus_L, $1)" != no; then + # Linking always hardcodes the temporary library directory. + _LT_TAGVAR(hardcode_action, $1)=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + _LT_TAGVAR(hardcode_action, $1)=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + _LT_TAGVAR(hardcode_action, $1)=unsupported +fi +AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) + +if test "$_LT_TAGVAR(hardcode_action, $1)" = relink || + test "$_LT_TAGVAR(inherit_rpath, $1)" = yes; then + # Fast installation is not supported + enable_fast_install=no +elif test "$shlibpath_overrides_runpath" = yes || + test "$enable_shared" = no; then + # Fast installation is not necessary + enable_fast_install=needless +fi +_LT_TAGDECL([], [hardcode_action], [0], + [How to hardcode a shared library path into an executable]) +])# _LT_LINKER_HARDCODE_LIBPATH + + +# _LT_CMD_STRIPLIB +# ---------------- +m4_defun([_LT_CMD_STRIPLIB], +[m4_require([_LT_DECL_EGREP]) +striplib= +old_striplib= +AC_MSG_CHECKING([whether stripping libraries is possible]) +if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" + test -z "$striplib" && striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) +else +# FIXME - insert some real tests, host_os isn't really good enough + case $host_os in + darwin*) + if test -n "$STRIP" ; then + striplib="$STRIP -x" + old_striplib="$STRIP -S" + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + ;; + *) + AC_MSG_RESULT([no]) + ;; + esac +fi +_LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) +_LT_DECL([], [striplib], [1]) +])# _LT_CMD_STRIPLIB + + +# _LT_SYS_DYNAMIC_LINKER([TAG]) +# ----------------------------- +# PORTME Fill in your ld.so characteristics +m4_defun([_LT_SYS_DYNAMIC_LINKER], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_OBJDUMP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +AC_MSG_CHECKING([dynamic linker characteristics]) +m4_if([$1], + [], [ +if test "$GCC" = yes; then + case $host_os in + darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; + *) lt_awk_arg="/^libraries:/" ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq="s,=\([[A-Za-z]]:\),\1,g" ;; + *) lt_sed_strip_eq="s,=/,/,g" ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" + else + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS=" "; FS="/|\n";} { + lt_foo=""; + lt_count=0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo="/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[[lt_foo]]++; } + if (lt_freq[[lt_foo]] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's,/\([[A-Za-z]]:\),\1,g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi]) +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=".so" +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='${libname}${release}${shared_ext}$major' + ;; + +aix[[4-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test "$host_cpu" = ia64; then + # AIX 5 supports IA64 + library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line `#! .'. This would cause the generated library to + # depend on `.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[[01]] | aix4.[[01]].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # AIX (on Power*) has no versioning support, so currently we can not hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + if test "$aix_use_runtimelinking" = yes; then + # If using run time linking (on AIX 4.2 or later) use lib<name>.so + # instead of lib<name>.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + else + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='${libname}${release}.a $libname.a' + soname_spec='${libname}${release}${shared_ext}$major' + fi + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='${libname}${shared_ext}' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[[45]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=".dll" + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl*) + # Native MSVC + libname_spec='$name' + soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' + library_names_spec='${libname}.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec="$LIB" + if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC wrapper + library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' + soname_spec='${libname}${release}${major}$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[[23]].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[[01]]* | freebsdelf3.[[01]]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ + freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=yes + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + if test "X$HPUX_IA64_MODE" = X32; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + fi + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[[3-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test "$lt_cv_prog_gnu_ld" = yes; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" + sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], + [lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ + LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], + [lt_cv_shlibpath_overrides_runpath=yes])]) + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + ]) + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Append ld.so.conf contents to the search path + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsdelf*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='NetBSD ld.elf_so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd*) + version_type=sunos + sys_lib_dlsearch_path_spec="/usr/lib" + need_lib_prefix=no + # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. + case $host_os in + openbsd3.3 | openbsd3.3.*) need_version=yes ;; + *) need_version=no ;; + esac + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + case $host_os in + openbsd2.[[89]] | openbsd2.[[89]].*) + shlibpath_overrides_runpath=no + ;; + *) + shlibpath_overrides_runpath=yes + ;; + esac + else + shlibpath_overrides_runpath=yes + fi + ;; + +os2*) + libname_spec='$name' + shrext_cmds=".dll" + need_lib_prefix=no + library_names_spec='$libname${shared_ext} $libname.a' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=LIBPATH + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec ;then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' + soname_spec='$libname${shared_ext}.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=freebsd-elf + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test "$with_gnu_ld" = yes; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +AC_MSG_RESULT([$dynamic_linker]) +test "$dynamic_linker" = no && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test "$GCC" = yes; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then + sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" +fi +if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then + sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" +fi + +_LT_DECL([], [variables_saved_for_relink], [1], + [Variables whose values should be saved in libtool wrapper scripts and + restored at link time]) +_LT_DECL([], [need_lib_prefix], [0], + [Do we need the "lib" prefix for modules?]) +_LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) +_LT_DECL([], [version_type], [0], [Library versioning type]) +_LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) +_LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) +_LT_DECL([], [shlibpath_overrides_runpath], [0], + [Is shlibpath searched before the hard-coded library search path?]) +_LT_DECL([], [libname_spec], [1], [Format of library name prefix]) +_LT_DECL([], [library_names_spec], [1], + [[List of archive names. First name is the real one, the rest are links. + The last name is the one that the linker finds with -lNAME]]) +_LT_DECL([], [soname_spec], [1], + [[The coded name of the library, if different from the real name]]) +_LT_DECL([], [install_override_mode], [1], + [Permission mode override for installation of shared libraries]) +_LT_DECL([], [postinstall_cmds], [2], + [Command to use after installation of a shared archive]) +_LT_DECL([], [postuninstall_cmds], [2], + [Command to use after uninstallation of a shared archive]) +_LT_DECL([], [finish_cmds], [2], + [Commands used to finish a libtool library installation in a directory]) +_LT_DECL([], [finish_eval], [1], + [[As "finish_cmds", except a single script fragment to be evaled but + not shown]]) +_LT_DECL([], [hardcode_into_libs], [0], + [Whether we should hardcode library paths into libraries]) +_LT_DECL([], [sys_lib_search_path_spec], [2], + [Compile-time system search path for libraries]) +_LT_DECL([], [sys_lib_dlsearch_path_spec], [2], + [Run-time system search path for libraries]) +])# _LT_SYS_DYNAMIC_LINKER + + +# _LT_PATH_TOOL_PREFIX(TOOL) +# -------------------------- +# find a file program which can recognize shared library +AC_DEFUN([_LT_PATH_TOOL_PREFIX], +[m4_require([_LT_DECL_EGREP])dnl +AC_MSG_CHECKING([for $1]) +AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, +[case $MAGIC_CMD in +[[\\/*] | ?:[\\/]*]) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD="$MAGIC_CMD" + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR +dnl $ac_dummy forces splitting on constant user-supplied paths. +dnl POSIX.2 word splitting is done only on the output of word expansions, +dnl not every word. This closes a longstanding sh security hole. + ac_dummy="m4_if([$2], , $PATH, [$2])" + for ac_dir in $ac_dummy; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$1; then + lt_cv_path_MAGIC_CMD="$ac_dir/$1" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS="$lt_save_ifs" + MAGIC_CMD="$lt_save_MAGIC_CMD" + ;; +esac]) +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + AC_MSG_RESULT($MAGIC_CMD) +else + AC_MSG_RESULT(no) +fi +_LT_DECL([], [MAGIC_CMD], [0], + [Used to examine libraries when file_magic_cmd begins with "file"])dnl +])# _LT_PATH_TOOL_PREFIX + +# Old name: +AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) + + +# _LT_PATH_MAGIC +# -------------- +# find a file program which can recognize a shared library +m4_defun([_LT_PATH_MAGIC], +[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) + else + MAGIC_CMD=: + fi +fi +])# _LT_PATH_MAGIC + + +# LT_PATH_LD +# ---------- +# find the pathname to the GNU or non-GNU linker +AC_DEFUN([LT_PATH_LD], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PROG_ECHO_BACKSLASH])dnl + +AC_ARG_WITH([gnu-ld], + [AS_HELP_STRING([--with-gnu-ld], + [assume the C compiler uses GNU ld @<:@default=no@:>@])], + [test "$withval" = no || with_gnu_ld=yes], + [with_gnu_ld=no])dnl + +ac_prog=ld +if test "$GCC" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + AC_MSG_CHECKING([for ld used by $CC]) + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [[\\/]]* | ?:[[\\/]]*) + re_direlt='/[[^/]][[^/]]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + AC_MSG_CHECKING([for GNU ld]) +else + AC_MSG_CHECKING([for non-GNU ld]) +fi +AC_CACHE_VAL(lt_cv_path_LD, +[if test -z "$LD"; then + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in + *GNU* | *'with BFD'*) + test "$with_gnu_ld" != no && break + ;; + *) + test "$with_gnu_ld" != yes && break + ;; + esac + fi + done + IFS="$lt_save_ifs" +else + lt_cv_path_LD="$LD" # Let the user override the test with a path. +fi]) +LD="$lt_cv_path_LD" +if test -n "$LD"; then + AC_MSG_RESULT($LD) +else + AC_MSG_RESULT(no) +fi +test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH]) +_LT_PATH_LD_GNU +AC_SUBST([LD]) + +_LT_TAGDECL([], [LD], [1], [The linker used to build libraries]) +])# LT_PATH_LD + +# Old names: +AU_ALIAS([AM_PROG_LD], [LT_PATH_LD]) +AU_ALIAS([AC_PROG_LD], [LT_PATH_LD]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_PROG_LD], []) +dnl AC_DEFUN([AC_PROG_LD], []) + + +# _LT_PATH_LD_GNU +#- -------------- +m4_defun([_LT_PATH_LD_GNU], +[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], lt_cv_prog_gnu_ld, +[# I'd rather use --version here, but apparently some GNU lds only accept -v. +case `$LD -v 2>&1 </dev/null` in +*GNU* | *'with BFD'*) + lt_cv_prog_gnu_ld=yes + ;; +*) + lt_cv_prog_gnu_ld=no + ;; +esac]) +with_gnu_ld=$lt_cv_prog_gnu_ld +])# _LT_PATH_LD_GNU + + +# _LT_CMD_RELOAD +# -------------- +# find reload flag for linker +# -- PORTME Some linkers may need a different reload flag. +m4_defun([_LT_CMD_RELOAD], +[AC_CACHE_CHECK([for $LD option to reload object files], + lt_cv_ld_reload_flag, + [lt_cv_ld_reload_flag='-r']) +reload_flag=$lt_cv_ld_reload_flag +case $reload_flag in +"" | " "*) ;; +*) reload_flag=" $reload_flag" ;; +esac +reload_cmds='$LD$reload_flag -o $output$reload_objs' +case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + if test "$GCC" != yes; then + reload_cmds=false + fi + ;; + darwin*) + if test "$GCC" = yes; then + reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs' + else + reload_cmds='$LD$reload_flag -o $output$reload_objs' + fi + ;; +esac +_LT_TAGDECL([], [reload_flag], [1], [How to create reloadable object files])dnl +_LT_TAGDECL([], [reload_cmds], [2])dnl +])# _LT_CMD_RELOAD + + +# _LT_CHECK_MAGIC_METHOD +# ---------------------- +# how to check for library dependencies +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_MAGIC_METHOD], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +AC_CACHE_CHECK([how to recognize dependent libraries], +lt_cv_deplibs_check_method, +[lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# `unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# which responds to the $file_magic_cmd with a given extended regex. +# If you have `file' or equivalent on your system and you're not sure +# whether `pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[[4-9]]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[[45]]*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='/usr/bin/file -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin. + if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=/usr/bin/file + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[[3-9]]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +esac +]) + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + +_LT_DECL([], [deplibs_check_method], [1], + [Method to check whether dependent libraries are shared objects]) +_LT_DECL([], [file_magic_cmd], [1], + [Command to use when deplibs_check_method = "file_magic"]) +_LT_DECL([], [file_magic_glob], [1], + [How to find potential files when deplibs_check_method = "file_magic"]) +_LT_DECL([], [want_nocaseglob], [1], + [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) +])# _LT_CHECK_MAGIC_METHOD + + +# LT_PATH_NM +# ---------- +# find the pathname to a BSD- or MS-compatible name lister +AC_DEFUN([LT_PATH_NM], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, +[if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM="$NM" +else + lt_nm_to_check="${ac_tool_prefix}nm" + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + tmp_nm="$ac_dir/$lt_tmp_nm" + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the `sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in + */dev/null* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS="$lt_save_ifs" + done + : ${lt_cv_path_NM=no} +fi]) +if test "$lt_cv_path_NM" != "no"; then + NM="$lt_cv_path_NM" +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) + case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols" + ;; + *) + DUMPBIN=: + ;; + esac + fi + AC_SUBST([DUMPBIN]) + if test "$DUMPBIN" != ":"; then + NM="$DUMPBIN" + fi +fi +test -z "$NM" && NM=nm +AC_SUBST([NM]) +_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl + +AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], + [lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) + cat conftest.out >&AS_MESSAGE_LOG_FD + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest*]) +])# LT_PATH_NM + +# Old names: +AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) +AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_PROG_NM], []) +dnl AC_DEFUN([AC_PROG_NM], []) + +# _LT_CHECK_SHAREDLIB_FROM_LINKLIB +# -------------------------------- +# how to determine the name of the shared library +# associated with a specific link library. +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +m4_require([_LT_DECL_DLLTOOL]) +AC_CACHE_CHECK([how to associate runtime and link libraries], +lt_cv_sharedlib_from_linklib_cmd, +[lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh + # decide which to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd="$ECHO" + ;; +esac +]) +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + +_LT_DECL([], [sharedlib_from_linklib_cmd], [1], + [Command to associate shared and link libraries]) +])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB + + +# _LT_PATH_MANIFEST_TOOL +# ---------------------- +# locate the manifest tool +m4_defun([_LT_PATH_MANIFEST_TOOL], +[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], + [lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&AS_MESSAGE_LOG_FD + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest*]) +if test "x$lt_cv_path_mainfest_tool" != xyes; then + MANIFEST_TOOL=: +fi +_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl +])# _LT_PATH_MANIFEST_TOOL + + +# LT_LIB_M +# -------- +# check for math library +AC_DEFUN([LT_LIB_M], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +LIBM= +case $host in +*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) + # These system don't have libm, or don't need it + ;; +*-ncr-sysv4.3*) + AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw") + AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") + ;; +*) + AC_CHECK_LIB(m, cos, LIBM="-lm") + ;; +esac +AC_SUBST([LIBM]) +])# LT_LIB_M + +# Old name: +AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_CHECK_LIBM], []) + + +# _LT_COMPILER_NO_RTTI([TAGNAME]) +# ------------------------------- +m4_defun([_LT_COMPILER_NO_RTTI], +[m4_require([_LT_TAG_COMPILER])dnl + +_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + +if test "$GCC" = yes; then + case $cc_basename in + nvcc*) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; + *) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; + esac + + _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], + lt_cv_prog_compiler_rtti_exceptions, + [-fno-rtti -fno-exceptions], [], + [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) +fi +_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], + [Compiler flag to turn off builtin functions]) +])# _LT_COMPILER_NO_RTTI + + +# _LT_CMD_GLOBAL_SYMBOLS +# ---------------------- +m4_defun([_LT_CMD_GLOBAL_SYMBOLS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([LT_PATH_NM])dnl +AC_REQUIRE([LT_PATH_LD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_TAG_COMPILER])dnl + +# Check for command to grab the raw symbol name followed by C symbol from nm. +AC_MSG_CHECKING([command to parse $NM output from $compiler object]) +AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], +[ +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[[BCDEGRST]]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[[BCDT]]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[[ABCDGISTW]]' + ;; +hpux*) + if test "$host_cpu" = ia64; then + symcode='[[ABCDEGRST]]' + fi + ;; +irix* | nonstopux*) + symcode='[[BCDEGRST]]' + ;; +osf*) + symcode='[[BCDEGQRST]]' + ;; +solaris*) + symcode='[[BDRT]]' + ;; +sco3.2v5*) + symcode='[[DT]]' + ;; +sysv4.2uw2*) + symcode='[[DT]]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[[ABDT]]' + ;; +sysv4) + symcode='[[DFNSTU]]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[[ABCDGIRSTW]]' ;; +esac + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p'" +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \(lib[[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"lib\2\", (void *) \&\2},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function + # and D for any global variable. + # Also find C++ and __fastcall symbols from MSVC++, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK ['"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\ +" {split(\$ 0, a, /\||\r/); split(a[2], s)};"\ +" s[1]~/^[@?]/{print s[1], s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx]" + else + lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if AC_TRY_EVAL(ac_compile); then + # Now try to grab the symbols. + nlist=conftest.nm + if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) +/* DATA imports from DLLs on WIN32 con't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT@&t@_DLSYM_CONST +#elif defined(__osf__) +/* This system does not cope well with relocations in const data. */ +# define LT@&t@_DLSYM_CONST +#else +# define LT@&t@_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT@&t@_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[[]] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS="conftstm.$ac_objext" + CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" + if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD + fi + else + echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test "$pipe_works" = yes; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done +]) +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + AC_MSG_RESULT(failed) +else + AC_MSG_RESULT(ok) +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + +_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], + [Take the output of nm and produce a listing of raw symbols and C names]) +_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], + [Transform the output of nm in a proper C declaration]) +_LT_DECL([global_symbol_to_c_name_address], + [lt_cv_sys_global_symbol_to_c_name_address], [1], + [Transform the output of nm in a C name address pair]) +_LT_DECL([global_symbol_to_c_name_address_lib_prefix], + [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], + [Transform the output of nm in a C name address pair when lib prefix is needed]) +_LT_DECL([], [nm_file_list_spec], [1], + [Specify filename containing input files for $NM]) +]) # _LT_CMD_GLOBAL_SYMBOLS + + +# _LT_COMPILER_PIC([TAGNAME]) +# --------------------------- +m4_defun([_LT_COMPILER_PIC], +[m4_require([_LT_TAG_COMPILER])dnl +_LT_TAGVAR(lt_prog_compiler_wl, $1)= +_LT_TAGVAR(lt_prog_compiler_pic, $1)= +_LT_TAGVAR(lt_prog_compiler_static, $1)= + +m4_if([$1], [CXX], [ + # C++ specific cases for pic, static, wl, etc. + if test "$GXX" = yes; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + *djgpp*) + # DJGPP does not support shared libraries at all + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + else + case $host_os in + aix[[4-9]]*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + chorus*) + case $cc_basename in + cxch68*) + # Green Hills C++ Compiler + # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" + ;; + esac + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + dgux*) + case $cc_basename in + ec++*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + ghcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + freebsd* | dragonfly*) + # FreeBSD uses GNU C++ + ;; + hpux9* | hpux10* | hpux11*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' + if test "$host_cpu" != ia64; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + fi + ;; + aCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + ;; + *) + ;; + esac + ;; + interix*) + # This is c89, which is MS Visual C++ (no shared libs) + # Anyone wants to do a port? + ;; + irix5* | irix6* | nonstopux*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + # CC pic flag -KPIC is the default. + ;; + *) + ;; + esac + ;; + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # KAI C++ Compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + ecpc* ) + # old Intel C++ for x86_64 which still supported -KPIC. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + icpc* ) + # Intel C++, used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + cxx*) + # Compaq C++ + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) + # IBM XL 8.0, 9.0 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + esac + ;; + esac + ;; + lynxos*) + ;; + m88k*) + ;; + mvs*) + case $cc_basename in + cxx*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' + ;; + *) + ;; + esac + ;; + netbsd* | netbsdelf*-gnu) + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + ;; + RCC*) + # Rational C++ 2.4.1 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + cxx*) + # Digital/Compaq C++ + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + *) + ;; + esac + ;; + psos*) + ;; + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + ;; + *) + ;; + esac + ;; + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + lcc*) + # Lucid + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + *) + ;; + esac + ;; + vxworks*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +], +[ + if test "$GCC" = yes; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' + if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + + hpux9* | hpux10* | hpux11*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC (with -KPIC) is the default. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64 which still supported -KPIC. + ecc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' + _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' + ;; + nagfor*) + # NAG Fortran compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + ccc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All Alpha code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='' + ;; + *Sun\ F* | *Sun*Fortran*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + ;; + *Intel*\ [[CF]]*Compiler*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + *Portland\ Group*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All OSF/1 code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + rdos*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + solaris*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; + *) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; + esac + ;; + + sunos4*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec ;then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + unicos*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + + uts4*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +]) +case $host_os in + # For platforms which do not support PIC, -DPIC is meaningless: + *djgpp*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" + ;; +esac + +AC_CACHE_CHECK([for $compiler option to produce PIC], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) +_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], + [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], + [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], + [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in + "" | " "*) ;; + *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; + esac], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) +fi +_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], + [Additional compiler flags for building library objects]) + +_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], + [How to pass a linker flag through the compiler]) +# +# Check to make sure the static flag actually works. +# +wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" +_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], + _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), + $lt_tmp_static_flag, + [], + [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) +_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], + [Compiler flag to prevent dynamic linking]) +])# _LT_COMPILER_PIC + + +# _LT_LINKER_SHLIBS([TAGNAME]) +# ---------------------------- +# See if the linker supports building shared libraries. +m4_defun([_LT_LINKER_SHLIBS], +[AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) +m4_if([$1], [CXX], [ + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + case $host_os in + aix[[4-9]]*) + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to AIX nm, but means don't demangle with GNU nm + # Also, AIX nm treats weak defined symbols like other global defined + # symbols, whereas GNU nm marks them as "W". + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + fi + ;; + pw32*) + _LT_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds" + ;; + cygwin* | mingw* | cegcc*) + case $cc_basename in + cl*) + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + ;; + esac + ;; + linux* | k*bsd*-gnu | gnu*) + _LT_TAGVAR(link_all_deplibs, $1)=no + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + ;; + esac +], [ + runpath_var= + _LT_TAGVAR(allow_undefined_flag, $1)= + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(archive_cmds, $1)= + _LT_TAGVAR(archive_expsym_cmds, $1)= + _LT_TAGVAR(compiler_needs_object, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(hardcode_automatic, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(hardcode_libdir_separator, $1)= + _LT_TAGVAR(hardcode_minus_L, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + _LT_TAGVAR(inherit_rpath, $1)=no + _LT_TAGVAR(link_all_deplibs, $1)=unknown + _LT_TAGVAR(module_cmds, $1)= + _LT_TAGVAR(module_expsym_cmds, $1)= + _LT_TAGVAR(old_archive_from_new_cmds, $1)= + _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= + _LT_TAGVAR(thread_safe_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + _LT_TAGVAR(include_expsyms, $1)= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ` (' and `)$', so one must not match beginning or + # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', + # as well as any symbol that contains `d'. + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. +dnl Note also adjust exclude_expsyms for C++ above. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test "$GCC" != yes; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++) + with_gnu_ld=yes + ;; + openbsd*) + with_gnu_ld=no + ;; + linux* | k*bsd*-gnu | gnu*) + _LT_TAGVAR(link_all_deplibs, $1)=no + ;; + esac + + _LT_TAGVAR(ld_shlibs, $1)=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test "$with_gnu_ld" = yes; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; + *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test "$lt_use_gnu_ld_interface" = yes; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='${wl}' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + supports_anon_versioning=no + case `$LD -v 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[[3-9]]*) + # On AIX/PPC, the GNU linker is very broken + if test "$host_cpu" != ia64; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach <jrb3@best.com> says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is; otherwise, prepend... + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test "$host_os" = linux-dietlibc; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test "$tmp_diet" = no + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + _LT_TAGVAR(whole_archive_flag_spec, $1)= + tmp_sharedflag='--shared' ;; + xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + ;; + esac + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) # Sun C 5.9 + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + + if test "x$supports_anon_versioning" = xyes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test "x$supports_anon_versioning" = xyes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + sunos4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + + if test "$_LT_TAGVAR(ld_shlibs, $1)" = no; then + runpath_var= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + _LT_TAGVAR(hardcode_direct, $1)=unsupported + fi + ;; + + aix[[4-9]]*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to AIX nm, but means don't demangle with GNU nm + # Also, AIX nm treats weak defined symbols like other global + # defined symbols, whereas GNU nm marks them as "W". + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then + aix_use_runtimelinking=yes + break + fi + done + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' + + if test "$GCC" = yes; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + ;; + esac + shared_flag='-shared' + if test "$aix_use_runtimelinking" = yes; then + shared_flag="$shared_flag "'${wl}-G' + fi + _LT_TAGVAR(link_all_deplibs, $1)=no + else + # not using gcc + if test "$host_cpu" = ia64; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(allow_undefined_flag, $1)='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' + if test "$with_gnu_ld" = yes; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + # This is similar to how AIX traditionally builds its shared libraries. + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + bsdi[[45]]*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl*) + # Native MSVC + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; + else + sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile="$lt_outputfile.exe" + lt_tool_outputfile="$lt_tool_outputfile.exe" + ;; + esac~ + if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC wrapper + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + # FIXME: Should let the user specify the lib program. + _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + ;; + esac + ;; + + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + dgux*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + hpux9*) + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + ;; + + hpux10*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test "$with_gnu_ld" = no; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + fi + ;; + + hpux11*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + m4_if($1, [], [ + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + _LT_LINKER_OPTION([if $CC understands -b], + _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], + [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) + ;; + esac + fi + if test "$with_gnu_ld" = no; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], + [lt_cv_irix_exported_symbol], + [save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" + AC_LINK_IFELSE( + [AC_LANG_SOURCE( + [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], + [C++], [[int foo (void) { return 0; }]], + [Fortran 77], [[ + subroutine foo + end]], + [Fortran], [[ + subroutine foo + end]])])], + [lt_cv_irix_exported_symbol=yes], + [lt_cv_irix_exported_symbol=no]) + LDFLAGS="$save_LDFLAGS"]) + if test "$lt_cv_irix_exported_symbol" = yes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' + fi + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + newsos6) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *nto* | *qnx*) + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + else + case $host_os in + openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + ;; + esac + fi + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' + ;; + + osf3*) + if test "$GCC" = yes; then + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test "$GCC" = yes; then + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + solaris*) + _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' + if test "$GCC" = yes; then + wlarc='${wl}' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + _LT_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='${wl}' + _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. GCC discards it without `$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test "$GCC" = yes; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + fi + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + sunos4*) + if test "x$host_vendor" = xsequent; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4) + case $host_vendor in + sni) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' + _LT_TAGVAR(hardcode_direct, $1)=no + ;; + motorola) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4.3*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + _LT_TAGVAR(ld_shlibs, $1)=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We can NOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + if test x$host_vendor = xsni; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Blargedynsym' + ;; + esac + fi + fi +]) +AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) +test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no + +_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld + +_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl +_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl +_LT_DECL([], [extract_expsyms_cmds], [2], + [The commands to extract the exported symbol list from a shared archive]) + +# +# Do we need to explicitly link libc? +# +case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in +x|xyes) + # Assume -lc should be added + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + + if test "$enable_shared" = yes && test "$GCC" = yes; then + case $_LT_TAGVAR(archive_cmds, $1) in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + AC_CACHE_CHECK([whether -lc should be explicitly linked in], + [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), + [$RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if AC_TRY_EVAL(ac_compile) 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) + pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) + _LT_TAGVAR(allow_undefined_flag, $1)= + if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) + then + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no + else + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes + fi + _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + ]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) + ;; + esac + fi + ;; +esac + +_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], + [Whether or not to add -lc for building shared libraries]) +_LT_TAGDECL([allow_libtool_libs_with_static_runtimes], + [enable_shared_with_static_runtimes], [0], + [Whether or not to disallow shared libs when runtime libs are static]) +_LT_TAGDECL([], [export_dynamic_flag_spec], [1], + [Compiler flag to allow reflexive dlopens]) +_LT_TAGDECL([], [whole_archive_flag_spec], [1], + [Compiler flag to generate shared objects directly from archives]) +_LT_TAGDECL([], [compiler_needs_object], [1], + [Whether the compiler copes with passing no objects directly]) +_LT_TAGDECL([], [old_archive_from_new_cmds], [2], + [Create an old-style archive from a shared archive]) +_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], + [Create a temporary old-style archive to link instead of a shared archive]) +_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) +_LT_TAGDECL([], [archive_expsym_cmds], [2]) +_LT_TAGDECL([], [module_cmds], [2], + [Commands used to build a loadable module if different from building + a shared archive.]) +_LT_TAGDECL([], [module_expsym_cmds], [2]) +_LT_TAGDECL([], [with_gnu_ld], [1], + [Whether we are building with GNU ld or not]) +_LT_TAGDECL([], [allow_undefined_flag], [1], + [Flag that allows shared libraries with undefined symbols to be built]) +_LT_TAGDECL([], [no_undefined_flag], [1], + [Flag that enforces no undefined symbols]) +_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], + [Flag to hardcode $libdir into a binary during linking. + This must work even if $libdir does not exist]) +_LT_TAGDECL([], [hardcode_libdir_separator], [1], + [Whether we need a single "-rpath" flag with a separated argument]) +_LT_TAGDECL([], [hardcode_direct], [0], + [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes + DIR into the resulting binary]) +_LT_TAGDECL([], [hardcode_direct_absolute], [0], + [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes + DIR into the resulting binary and the resulting library dependency is + "absolute", i.e impossible to change by setting ${shlibpath_var} if the + library is relocated]) +_LT_TAGDECL([], [hardcode_minus_L], [0], + [Set to "yes" if using the -LDIR flag during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_shlibpath_var], [0], + [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_automatic], [0], + [Set to "yes" if building a shared library automatically hardcodes DIR + into the library and all subsequent libraries and executables linked + against it]) +_LT_TAGDECL([], [inherit_rpath], [0], + [Set to yes if linker adds runtime paths of dependent libraries + to runtime path list]) +_LT_TAGDECL([], [link_all_deplibs], [0], + [Whether libtool must link a program against all its dependency libraries]) +_LT_TAGDECL([], [always_export_symbols], [0], + [Set to "yes" if exported symbols are required]) +_LT_TAGDECL([], [export_symbols_cmds], [2], + [The commands to list exported symbols]) +_LT_TAGDECL([], [exclude_expsyms], [1], + [Symbols that should not be listed in the preloaded symbols]) +_LT_TAGDECL([], [include_expsyms], [1], + [Symbols that must always be exported]) +_LT_TAGDECL([], [prelink_cmds], [2], + [Commands necessary for linking programs (against libraries) with templates]) +_LT_TAGDECL([], [postlink_cmds], [2], + [Commands necessary for finishing linking programs]) +_LT_TAGDECL([], [file_list_spec], [1], + [Specify filename containing input files]) +dnl FIXME: Not yet implemented +dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], +dnl [Compiler flag to generate thread safe objects]) +])# _LT_LINKER_SHLIBS + + +# _LT_LANG_C_CONFIG([TAG]) +# ------------------------ +# Ensure that the configuration variables for a C compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to `libtool'. +m4_defun([_LT_LANG_C_CONFIG], +[m4_require([_LT_DECL_EGREP])dnl +lt_save_CC="$CC" +AC_LANG_PUSH(C) + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + +_LT_TAG_COMPILER +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + LT_SYS_DLOPEN_SELF + _LT_CMD_STRIPLIB + + # Report which library types will actually be built + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[[4-9]]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_CONFIG($1) +fi +AC_LANG_POP +CC="$lt_save_CC" +])# _LT_LANG_C_CONFIG + + +# _LT_LANG_CXX_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a C++ compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to `libtool'. +m4_defun([_LT_LANG_CXX_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +if test -n "$CXX" && ( test "X$CXX" != "Xno" && + ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || + (test "X$CXX" != "Xg++"))) ; then + AC_PROG_CXXCPP +else + _lt_caught_CXX_error=yes +fi + +AC_LANG_PUSH(C++) +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(compiler_needs_object, $1)=no +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for C++ test sources. +ac_ext=cpp + +# Object file extension for compiled C++ test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the CXX compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test "$_lt_caught_CXX_error" != yes; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="int some_variable = 0;" + + # Code to be used in simple link tests + lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_CFLAGS=$CFLAGS + lt_save_LD=$LD + lt_save_GCC=$GCC + GCC=$GXX + lt_save_with_gnu_ld=$with_gnu_ld + lt_save_path_LD=$lt_cv_path_LD + if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then + lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx + else + $as_unset lt_cv_prog_gnu_ld + fi + if test -n "${lt_cv_path_LDCXX+set}"; then + lt_cv_path_LD=$lt_cv_path_LDCXX + else + $as_unset lt_cv_path_LD + fi + test -z "${LDCXX+set}" || LD=$LDCXX + CC=${CXX-"c++"} + CFLAGS=$CXXFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + # We don't want -fno-exception when compiling C++ code, so set the + # no_builtin_flag separately + if test "$GXX" = yes; then + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' + else + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + fi + + if test "$GXX" = yes; then + # Set up default GNU C++ configuration + + LT_PATH_LD + + # Check if GNU C++ uses GNU ld as the underlying linker, since the + # archiving commands below assume that GNU ld is being used. + if test "$with_gnu_ld" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + + # If archive_cmds runs LD, not CC, wlarc should be empty + # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to + # investigate it a little bit more. (MM) + wlarc='${wl}' + + # ancient GNU ld didn't support --whole-archive et. al. + if eval "`$CC -print-prog-name=ld` --help 2>&1" | + $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + with_gnu_ld=no + wlarc= + + # A generic and very simple default shared library creation + # command for GNU C++ for the case where it uses the native + # linker, instead of GNU ld. If possible, this setting should + # overridden to take advantage of the native linker features on + # the platform it is being used on. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + fi + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + GXX=no + with_gnu_ld=no + wlarc= + fi + + # PORTME: fill in a description of your system's C++ link characteristics + AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) + _LT_TAGVAR(ld_shlibs, $1)=yes + case $host_os in + aix3*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aix[[4-9]]*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' + + if test "$GXX" = yes; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + esac + shared_flag='-shared' + if test "$aix_use_runtimelinking" = yes; then + shared_flag="$shared_flag "'${wl}-G' + fi + else + # not using gcc + if test "$host_cpu" = ia64; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to + # export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(allow_undefined_flag, $1)='-berok' + # Determine the default libpath from the value encoded in an empty + # executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' + if test "$with_gnu_ld" = yes; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + # This is similar to how AIX traditionally builds its shared + # libraries. + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' + fi + fi + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach <jrb3@best.com> says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + chorus*) + case $cc_basename in + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + cygwin* | mingw* | pw32* | cegcc*) + case $GXX,$cc_basename in + ,cl* | no,cl*) + # Native MSVC + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; + else + $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile="$lt_outputfile.exe" + lt_tool_outputfile="$lt_tool_outputfile.exe" + ;; + esac~ + func_to_tool_file "$lt_outputfile"~ + if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # g++ + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is; otherwise, prepend... + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + dgux*) + case $cc_basename in + ec++*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + ghcx*) + # Green Hills C++ Compiler + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + freebsd2.*) + # C++ shared libraries reported to be fairly broken before + # switch to ELF + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + freebsd-elf*) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + ;; + + freebsd* | dragonfly*) + # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF + # conventions + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + hpux9*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + hpux10*|hpux11*) + if test $with_gnu_ld = no; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + ;; + *) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + ;; + esac + fi + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + ;; + esac + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes; then + if test $with_gnu_ld = no; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + fi + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + irix5* | irix6*) + case $cc_basename in + CC*) + # SGI C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + + # Archives containing C++ object files must be created using + # "CC -ar", where "CC" is the IRIX C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' + ;; + *) + if test "$GXX" = yes; then + if test "$with_gnu_ld" = no; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib' + fi + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + esac + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + + # Archives containing C++ object files must be created using + # "CC -Bstatic", where "CC" is the KAI C++ compiler. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' + ;; + icpc* | ecpc* ) + # Intel C++ + with_gnu_ld=yes + # version 8.0 and above of icpc choke on multiply defined symbols + # if we add $predep_objects and $postdep_objects, however 7.1 and + # earlier do not add the objects themselves. + case `$CC -V 2>&1` in + *"Version 7."*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 8.0 or newer + tmp_idyn= + case $host_cpu in + ia64*) tmp_idyn=' -i_dynamic';; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + case `$CC -V` in + *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) + _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ + compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' + _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ + $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ + $RANLIB $oldlib' + _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' + ;; + *) # Version 6 and above use weak symbols + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + ;; + cxx*) + # Compaq C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols' + + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' + ;; + xl* | mpixl* | bgxl*) + # IBM XL 8.0 on PPC, with GNU ld + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + if test "x$supports_anon_versioning" = xyes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' + fi + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + + # Not sure whether something based on + # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 + # would be better. + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + esac + ;; + esac + ;; + + lynxos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + m88k*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + mvs*) + case $cc_basename in + cxx*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + fi + # Workaround some broken pre-1.5 toolchains + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' + ;; + + *nto* | *qnx*) + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + openbsd2*) + # C++ shared libraries are fairly broken + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + fi + output_verbose_link_cmd=func_echo_all + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Archives containing C++ object files must be created using + # the KAI C++ compiler. + case $host in + osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; + *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; + esac + ;; + RCC*) + # Rational C++ 2.4.1 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + cxx*) + case $host in + osf3*) + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + ;; + *) + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ + echo "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~ + $RM $lib.exp' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes && test "$with_gnu_ld" = no; then + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + case $host in + osf3*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + psos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + lcc*) + # Lucid + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(archive_cmds_need_lc,$1)=yes + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. + # Supported since Solaris 2.6 (maybe 2.5.1?) + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + + # The C++ compiler must be used to create the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' + ;; + *) + # GNU C++ compiler with Solaris linker + if test "$GXX" = yes && test "$with_gnu_ld" = no; then + _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs' + if $CC --version | $GREP -v '^2\.7' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + else + # g++ 2.7 appears to require `-G' NOT `-shared' on this + # platform. + _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + fi + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir' + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + ;; + esac + fi + ;; + esac + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We can NOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ + '"$_LT_TAGVAR(old_archive_cmds, $1)" + _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ + '"$_LT_TAGVAR(reload_cmds, $1)" + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + vxworks*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) + test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no + + _LT_TAGVAR(GCC, $1)="$GXX" + _LT_TAGVAR(LD, $1)="$LD" + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS + LDCXX=$LD + LD=$lt_save_LD + GCC=$lt_save_GCC + with_gnu_ld=$lt_save_with_gnu_ld + lt_cv_path_LDCXX=$lt_cv_path_LD + lt_cv_path_LD=$lt_save_path_LD + lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld + lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld +fi # test "$_lt_caught_CXX_error" != yes + +AC_LANG_POP +])# _LT_LANG_CXX_CONFIG + + +# _LT_FUNC_STRIPNAME_CNF +# ---------------------- +# func_stripname_cnf prefix suffix name +# strip PREFIX and SUFFIX off of NAME. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +# +# This function is identical to the (non-XSI) version of func_stripname, +# except this one can be used by m4 code that may be executed by configure, +# rather than the libtool script. +m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl +AC_REQUIRE([_LT_DECL_SED]) +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) +func_stripname_cnf () +{ + case ${2} in + .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; + *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; + esac +} # func_stripname_cnf +])# _LT_FUNC_STRIPNAME_CNF + +# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) +# --------------------------------- +# Figure out "hidden" library dependencies from verbose +# compiler output when linking a shared library. +# Parse the compiler output and extract the necessary +# objects, libraries and library flags. +m4_defun([_LT_SYS_HIDDEN_LIBDEPS], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl +# Dependencies to place before and after the object being linked: +_LT_TAGVAR(predep_objects, $1)= +_LT_TAGVAR(postdep_objects, $1)= +_LT_TAGVAR(predeps, $1)= +_LT_TAGVAR(postdeps, $1)= +_LT_TAGVAR(compiler_lib_search_path, $1)= + +dnl we can't use the lt_simple_compile_test_code here, +dnl because it contains code intended for an executable, +dnl not a library. It's possible we should let each +dnl tag define a new lt_????_link_test_code variable, +dnl but it's only used here... +m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF +int a; +void foo (void) { a = 0; } +_LT_EOF +], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF +class Foo +{ +public: + Foo (void) { a = 0; } +private: + int a; +}; +_LT_EOF +], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer*4 a + a=0 + return + end +_LT_EOF +], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer a + a=0 + return + end +_LT_EOF +], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF +public class foo { + private int a; + public void bar (void) { + a = 0; + } +}; +_LT_EOF +], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF +package foo +func foo() { +} +_LT_EOF +]) + +_lt_libdeps_save_CFLAGS=$CFLAGS +case "$CC $CFLAGS " in #( +*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; +*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; +*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; +esac + +dnl Parse the compiler output and extract the necessary +dnl objects, libraries and library flags. +if AC_TRY_EVAL(ac_compile); then + # Parse the compiler output and extract the necessary + # objects, libraries and library flags. + + # Sentinel used to keep track of whether or not we are before + # the conftest object file. + pre_test_object_deps_done=no + + for p in `eval "$output_verbose_link_cmd"`; do + case ${prev}${p} in + + -L* | -R* | -l*) + # Some compilers place space between "-{L,R}" and the path. + # Remove the space. + if test $p = "-L" || + test $p = "-R"; then + prev=$p + continue + fi + + # Expand the sysroot to ease extracting the directories later. + if test -z "$prev"; then + case $p in + -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; + -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; + -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; + esac + fi + case $p in + =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; + esac + if test "$pre_test_object_deps_done" = no; then + case ${prev} in + -L | -R) + # Internal compiler library paths should come after those + # provided the user. The postdeps already come after the + # user supplied libs so there is no need to process them. + if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then + _LT_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}" + else + _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}" + fi + ;; + # The "-l" case would never come before the object being + # linked, so don't bother handling this case. + esac + else + if test -z "$_LT_TAGVAR(postdeps, $1)"; then + _LT_TAGVAR(postdeps, $1)="${prev}${p}" + else + _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} ${prev}${p}" + fi + fi + prev= + ;; + + *.lto.$objext) ;; # Ignore GCC LTO objects + *.$objext) + # This assumes that the test object file only shows up + # once in the compiler output. + if test "$p" = "conftest.$objext"; then + pre_test_object_deps_done=yes + continue + fi + + if test "$pre_test_object_deps_done" = no; then + if test -z "$_LT_TAGVAR(predep_objects, $1)"; then + _LT_TAGVAR(predep_objects, $1)="$p" + else + _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" + fi + else + if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then + _LT_TAGVAR(postdep_objects, $1)="$p" + else + _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" + fi + fi + ;; + + *) ;; # Ignore the rest. + + esac + done + + # Clean up. + rm -f a.out a.exe +else + echo "libtool.m4: error: problem compiling $1 test program" +fi + +$RM -f confest.$objext +CFLAGS=$_lt_libdeps_save_CFLAGS + +# PORTME: override above test on systems where it is broken +m4_if([$1], [CXX], +[case $host_os in +interix[[3-9]]*) + # Interix 3.5 installs completely hosed .la files for C++, so rather than + # hack all around it, let's just trust "g++" to DTRT. + _LT_TAGVAR(predep_objects,$1)= + _LT_TAGVAR(postdep_objects,$1)= + _LT_TAGVAR(postdeps,$1)= + ;; + +linux*) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + + # The more standards-conforming stlport4 library is + # incompatible with the Cstd library. Avoid specifying + # it if it's in CXXFLAGS. Ignore libCrun as + # -library=stlport4 depends on it. + case " $CXX $CXXFLAGS " in + *" -library=stlport4 "*) + solaris_use_stlport4=yes + ;; + esac + + if test "$solaris_use_stlport4" != yes; then + _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' + fi + ;; + esac + ;; + +solaris*) + case $cc_basename in + CC* | sunCC*) + # The more standards-conforming stlport4 library is + # incompatible with the Cstd library. Avoid specifying + # it if it's in CXXFLAGS. Ignore libCrun as + # -library=stlport4 depends on it. + case " $CXX $CXXFLAGS " in + *" -library=stlport4 "*) + solaris_use_stlport4=yes + ;; + esac + + # Adding this requires a known-good setup of shared libraries for + # Sun compiler versions before 5.6, else PIC objects from an old + # archive will be linked into the output, leading to subtle bugs. + if test "$solaris_use_stlport4" != yes; then + _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' + fi + ;; + esac + ;; +esac +]) + +case " $_LT_TAGVAR(postdeps, $1) " in +*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; +esac + _LT_TAGVAR(compiler_lib_search_dirs, $1)= +if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then + _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | ${SED} -e 's! -L! !g' -e 's!^ !!'` +fi +_LT_TAGDECL([], [compiler_lib_search_dirs], [1], + [The directories searched by this compiler when creating a shared library]) +_LT_TAGDECL([], [predep_objects], [1], + [Dependencies to place before and after the objects being linked to + create a shared library]) +_LT_TAGDECL([], [postdep_objects], [1]) +_LT_TAGDECL([], [predeps], [1]) +_LT_TAGDECL([], [postdeps], [1]) +_LT_TAGDECL([], [compiler_lib_search_path], [1], + [The library search path used internally by the compiler when linking + a shared library]) +])# _LT_SYS_HIDDEN_LIBDEPS + + +# _LT_LANG_F77_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a Fortran 77 compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_F77_CONFIG], +[AC_LANG_PUSH(Fortran 77) +if test -z "$F77" || test "X$F77" = "Xno"; then + _lt_disable_F77=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for f77 test sources. +ac_ext=f + +# Object file extension for compiled f77 test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the F77 compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test "$_lt_disable_F77" != yes; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC="$CC" + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${F77-"f77"} + CFLAGS=$FFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + GCC=$G77 + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)="$G77" + _LT_TAGVAR(LD, $1)="$LD" + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC="$lt_save_CC" + CFLAGS="$lt_save_CFLAGS" +fi # test "$_lt_disable_F77" != yes + +AC_LANG_POP +])# _LT_LANG_F77_CONFIG + + +# _LT_LANG_FC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for a Fortran compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_FC_CONFIG], +[AC_LANG_PUSH(Fortran) + +if test -z "$FC" || test "X$FC" = "Xno"; then + _lt_disable_FC=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for fc test sources. +ac_ext=${ac_fc_srcext-f} + +# Object file extension for compiled fc test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the FC compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test "$_lt_disable_FC" != yes; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC="$CC" + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${FC-"f95"} + CFLAGS=$FCFLAGS + compiler=$CC + GCC=$ac_cv_fc_compiler_gnu + + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)="$ac_cv_fc_compiler_gnu" + _LT_TAGVAR(LD, $1)="$LD" + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test "$_lt_disable_FC" != yes + +AC_LANG_POP +])# _LT_LANG_FC_CONFIG + + +# _LT_LANG_GCJ_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Java Compiler compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_GCJ_CONFIG], +[AC_REQUIRE([LT_PROG_GCJ])dnl +AC_LANG_SAVE + +# Source file extension for Java test sources. +ac_ext=java + +# Object file extension for compiled Java test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="class foo {}" + +# Code to be used in simple link tests +lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GCJ-"gcj"} +CFLAGS=$GCJFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)="$LD" +_LT_CC_BASENAME([$compiler]) + +# GCJ did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GCJ_CONFIG + + +# _LT_LANG_GO_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Go compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_GO_CONFIG], +[AC_REQUIRE([LT_PROG_GO])dnl +AC_LANG_SAVE + +# Source file extension for Go test sources. +ac_ext=go + +# Object file extension for compiled Go test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="package main; func main() { }" + +# Code to be used in simple link tests +lt_simple_link_test_code='package main; func main() { }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GOC-"gccgo"} +CFLAGS=$GOFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)="$LD" +_LT_CC_BASENAME([$compiler]) + +# Go did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GO_CONFIG + + +# _LT_LANG_RC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for the Windows resource compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_RC_CONFIG], +[AC_REQUIRE([LT_PROG_RC])dnl +AC_LANG_SAVE + +# Source file extension for RC test sources. +ac_ext=rc + +# Object file extension for compiled RC test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' + +# Code to be used in simple link tests +lt_simple_link_test_code="$lt_simple_compile_test_code" + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC="$CC" +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC= +CC=${RC-"windres"} +CFLAGS= +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_CC_BASENAME([$compiler]) +_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + +if test -n "$compiler"; then + : + _LT_CONFIG($1) +fi + +GCC=$lt_save_GCC +AC_LANG_RESTORE +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_RC_CONFIG + + +# LT_PROG_GCJ +# ----------- +AC_DEFUN([LT_PROG_GCJ], +[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], + [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], + [AC_CHECK_TOOL(GCJ, gcj,) + test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2" + AC_SUBST(GCJFLAGS)])])[]dnl +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_GCJ], []) + + +# LT_PROG_GO +# ---------- +AC_DEFUN([LT_PROG_GO], +[AC_CHECK_TOOL(GOC, gccgo,) +]) + + +# LT_PROG_RC +# ---------- +AC_DEFUN([LT_PROG_RC], +[AC_CHECK_TOOL(RC, windres,) +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_RC], []) + + +# _LT_DECL_EGREP +# -------------- +# If we don't have a new enough Autoconf to choose the best grep +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_EGREP], +[AC_REQUIRE([AC_PROG_EGREP])dnl +AC_REQUIRE([AC_PROG_FGREP])dnl +test -z "$GREP" && GREP=grep +_LT_DECL([], [GREP], [1], [A grep program that handles long lines]) +_LT_DECL([], [EGREP], [1], [An ERE matcher]) +_LT_DECL([], [FGREP], [1], [A literal string matcher]) +dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too +AC_SUBST([GREP]) +]) + + +# _LT_DECL_OBJDUMP +# -------------- +# If we don't have a new enough Autoconf to choose the best objdump +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_OBJDUMP], +[AC_CHECK_TOOL(OBJDUMP, objdump, false) +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) +AC_SUBST([OBJDUMP]) +]) + +# _LT_DECL_DLLTOOL +# ---------------- +# Ensure DLLTOOL variable is set. +m4_defun([_LT_DECL_DLLTOOL], +[AC_CHECK_TOOL(DLLTOOL, dlltool, false) +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program]) +AC_SUBST([DLLTOOL]) +]) + +# _LT_DECL_SED +# ------------ +# Check for a fully-functional sed program, that truncates +# as few characters as possible. Prefer GNU sed if found. +m4_defun([_LT_DECL_SED], +[AC_PROG_SED +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" +_LT_DECL([], [SED], [1], [A sed program that does not truncate output]) +_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], + [Sed that helps us avoid accidentally triggering echo(1) options like -n]) +])# _LT_DECL_SED + +m4_ifndef([AC_PROG_SED], [ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_SED. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # + +m4_defun([AC_PROG_SED], +[AC_MSG_CHECKING([for a sed that does not truncate output]) +AC_CACHE_VAL(lt_cv_path_SED, +[# Loop through the user's path and test for sed and gsed. +# Then use that list of sed's as ones to test for truncation. +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for lt_ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then + lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" + fi + done + done +done +IFS=$as_save_IFS +lt_ac_max=0 +lt_ac_count=0 +# Add /usr/xpg4/bin/sed as it is typically found on Solaris +# along with /bin/sed that truncates output. +for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do + test ! -f $lt_ac_sed && continue + cat /dev/null > conftest.in + lt_ac_count=0 + echo $ECHO_N "0123456789$ECHO_C" >conftest.in + # Check for GNU sed and select it if it is found. + if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then + lt_cv_path_SED=$lt_ac_sed + break + fi + while true; do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo >>conftest.nl + $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break + cmp -s conftest.out conftest.nl || break + # 10000 chars as input seems more than enough + test $lt_ac_count -gt 10 && break + lt_ac_count=`expr $lt_ac_count + 1` + if test $lt_ac_count -gt $lt_ac_max; then + lt_ac_max=$lt_ac_count + lt_cv_path_SED=$lt_ac_sed + fi + done +done +]) +SED=$lt_cv_path_SED +AC_SUBST([SED]) +AC_MSG_RESULT([$SED]) +])#AC_PROG_SED +])#m4_ifndef + +# Old name: +AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_SED], []) + + +# _LT_CHECK_SHELL_FEATURES +# ------------------------ +# Find out whether the shell is Bourne or XSI compatible, +# or has some other useful features. +m4_defun([_LT_CHECK_SHELL_FEATURES], +[AC_MSG_CHECKING([whether the shell understands some XSI constructs]) +# Try some XSI features +xsi_shell=no +( _lt_dummy="a/b/c" + test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \ + = c,a/b,b/c, \ + && eval 'test $(( 1 + 1 )) -eq 2 \ + && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \ + && xsi_shell=yes +AC_MSG_RESULT([$xsi_shell]) +_LT_CONFIG_LIBTOOL_INIT([xsi_shell='$xsi_shell']) + +AC_MSG_CHECKING([whether the shell understands "+="]) +lt_shell_append=no +( foo=bar; set foo baz; eval "$[1]+=\$[2]" && test "$foo" = barbaz ) \ + >/dev/null 2>&1 \ + && lt_shell_append=yes +AC_MSG_RESULT([$lt_shell_append]) +_LT_CONFIG_LIBTOOL_INIT([lt_shell_append='$lt_shell_append']) + +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi +_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac +_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl +_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl +])# _LT_CHECK_SHELL_FEATURES + + +# _LT_PROG_FUNCTION_REPLACE (FUNCNAME, REPLACEMENT-BODY) +# ------------------------------------------------------ +# In `$cfgfile', look for function FUNCNAME delimited by `^FUNCNAME ()$' and +# '^} FUNCNAME ', and replace its body with REPLACEMENT-BODY. +m4_defun([_LT_PROG_FUNCTION_REPLACE], +[dnl { +sed -e '/^$1 ()$/,/^} # $1 /c\ +$1 ()\ +{\ +m4_bpatsubsts([$2], [$], [\\], [^\([ ]\)], [\\\1]) +} # Extended-shell $1 implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: +]) + + +# _LT_PROG_REPLACE_SHELLFNS +# ------------------------- +# Replace existing portable implementations of several shell functions with +# equivalent extended shell implementations where those features are available.. +m4_defun([_LT_PROG_REPLACE_SHELLFNS], +[if test x"$xsi_shell" = xyes; then + _LT_PROG_FUNCTION_REPLACE([func_dirname], [dnl + case ${1} in + */*) func_dirname_result="${1%/*}${2}" ;; + * ) func_dirname_result="${3}" ;; + esac]) + + _LT_PROG_FUNCTION_REPLACE([func_basename], [dnl + func_basename_result="${1##*/}"]) + + _LT_PROG_FUNCTION_REPLACE([func_dirname_and_basename], [dnl + case ${1} in + */*) func_dirname_result="${1%/*}${2}" ;; + * ) func_dirname_result="${3}" ;; + esac + func_basename_result="${1##*/}"]) + + _LT_PROG_FUNCTION_REPLACE([func_stripname], [dnl + # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are + # positional parameters, so assign one to ordinary parameter first. + func_stripname_result=${3} + func_stripname_result=${func_stripname_result#"${1}"} + func_stripname_result=${func_stripname_result%"${2}"}]) + + _LT_PROG_FUNCTION_REPLACE([func_split_long_opt], [dnl + func_split_long_opt_name=${1%%=*} + func_split_long_opt_arg=${1#*=}]) + + _LT_PROG_FUNCTION_REPLACE([func_split_short_opt], [dnl + func_split_short_opt_arg=${1#??} + func_split_short_opt_name=${1%"$func_split_short_opt_arg"}]) + + _LT_PROG_FUNCTION_REPLACE([func_lo2o], [dnl + case ${1} in + *.lo) func_lo2o_result=${1%.lo}.${objext} ;; + *) func_lo2o_result=${1} ;; + esac]) + + _LT_PROG_FUNCTION_REPLACE([func_xform], [ func_xform_result=${1%.*}.lo]) + + _LT_PROG_FUNCTION_REPLACE([func_arith], [ func_arith_result=$(( $[*] ))]) + + _LT_PROG_FUNCTION_REPLACE([func_len], [ func_len_result=${#1}]) +fi + +if test x"$lt_shell_append" = xyes; then + _LT_PROG_FUNCTION_REPLACE([func_append], [ eval "${1}+=\\${2}"]) + + _LT_PROG_FUNCTION_REPLACE([func_append_quoted], [dnl + func_quote_for_eval "${2}" +dnl m4 expansion turns \\\\ into \\, and then the shell eval turns that into \ + eval "${1}+=\\\\ \\$func_quote_for_eval_result"]) + + # Save a `func_append' function call where possible by direct use of '+=' + sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +else + # Save a `func_append' function call even when '+=' is not available + sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +fi + +if test x"$_lt_function_replace_fail" = x":"; then + AC_MSG_WARN([Unable to substitute extended shell functions in $ofile]) +fi +]) + +# _LT_PATH_CONVERSION_FUNCTIONS +# ----------------------------- +# Determine which file name conversion functions should be used by +# func_to_host_file (and, implicitly, by func_to_host_path). These are needed +# for certain cross-compile configurations and native mingw. +m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_MSG_CHECKING([how to convert $build file names to $host format]) +AC_CACHE_VAL(lt_cv_to_host_file_cmd, +[case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac +]) +to_host_file_cmd=$lt_cv_to_host_file_cmd +AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) +_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], + [0], [convert $build file names to $host format])dnl + +AC_MSG_CHECKING([how to convert $build file names to toolchain format]) +AC_CACHE_VAL(lt_cv_to_tool_file_cmd, +[#assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac +]) +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) +_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], + [0], [convert $build files to toolchain format])dnl +])# _LT_PATH_CONVERSION_FUNCTIONS + +# Helper functions for option handling. -*- Autoconf -*- +# +# Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 7 ltoptions.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) + + +# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) +# ------------------------------------------ +m4_define([_LT_MANGLE_OPTION], +[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) + + +# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) +# --------------------------------------- +# Set option OPTION-NAME for macro MACRO-NAME, and if there is a +# matching handler defined, dispatch to it. Other OPTION-NAMEs are +# saved as a flag. +m4_define([_LT_SET_OPTION], +[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl +m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), + _LT_MANGLE_DEFUN([$1], [$2]), + [m4_warning([Unknown $1 option `$2'])])[]dnl +]) + + +# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) +# ------------------------------------------------------------ +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +m4_define([_LT_IF_OPTION], +[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) + + +# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) +# ------------------------------------------------------- +# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME +# are set. +m4_define([_LT_UNLESS_OPTIONS], +[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), + [m4_define([$0_found])])])[]dnl +m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 +])[]dnl +]) + + +# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) +# ---------------------------------------- +# OPTION-LIST is a space-separated list of Libtool options associated +# with MACRO-NAME. If any OPTION has a matching handler declared with +# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about +# the unknown option and exit. +m4_defun([_LT_SET_OPTIONS], +[# Set options +m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [_LT_SET_OPTION([$1], _LT_Option)]) + +m4_if([$1],[LT_INIT],[ + dnl + dnl Simply set some default values (i.e off) if boolean options were not + dnl specified: + _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no + ]) + _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no + ]) + dnl + dnl If no reference was made to various pairs of opposing options, then + dnl we run the default mode handler for the pair. For example, if neither + dnl `shared' nor `disable-shared' was passed, we enable building of shared + dnl archives by default: + _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) + _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], + [_LT_ENABLE_FAST_INSTALL]) + ]) +])# _LT_SET_OPTIONS + + + +# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) +# ----------------------------------------- +m4_define([_LT_MANGLE_DEFUN], +[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) + + +# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) +# ----------------------------------------------- +m4_define([LT_OPTION_DEFINE], +[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl +])# LT_OPTION_DEFINE + + +# dlopen +# ------ +LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes +]) + +AU_DEFUN([AC_LIBTOOL_DLOPEN], +[_LT_SET_OPTION([LT_INIT], [dlopen]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the `dlopen' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) + + +# win32-dll +# --------- +# Declare package support for building win32 dll's. +LT_OPTION_DEFINE([LT_INIT], [win32-dll], +[enable_win32_dll=yes + +case $host in +*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) + AC_CHECK_TOOL(AS, as, false) + AC_CHECK_TOOL(DLLTOOL, dlltool, false) + AC_CHECK_TOOL(OBJDUMP, objdump, false) + ;; +esac + +test -z "$AS" && AS=as +_LT_DECL([], [AS], [1], [Assembler program])dnl + +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl + +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl +])# win32-dll + +AU_DEFUN([AC_LIBTOOL_WIN32_DLL], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +_LT_SET_OPTION([LT_INIT], [win32-dll]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the `win32-dll' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) + + +# _LT_ENABLE_SHARED([DEFAULT]) +# ---------------------------- +# implement the --enable-shared flag, and supports the `shared' and +# `disable-shared' LT_INIT options. +# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. +m4_define([_LT_ENABLE_SHARED], +[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([shared], + [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], + [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac], + [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) + + _LT_DECL([build_libtool_libs], [enable_shared], [0], + [Whether or not to build shared libraries]) +])# _LT_ENABLE_SHARED + +LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) +]) + +AC_DEFUN([AC_DISABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], [disable-shared]) +]) + +AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) +AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_SHARED], []) +dnl AC_DEFUN([AM_DISABLE_SHARED], []) + + + +# _LT_ENABLE_STATIC([DEFAULT]) +# ---------------------------- +# implement the --enable-static flag, and support the `static' and +# `disable-static' LT_INIT options. +# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. +m4_define([_LT_ENABLE_STATIC], +[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([static], + [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], + [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac], + [enable_static=]_LT_ENABLE_STATIC_DEFAULT) + + _LT_DECL([build_old_libs], [enable_static], [0], + [Whether or not to build static libraries]) +])# _LT_ENABLE_STATIC + +LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) +]) + +AC_DEFUN([AC_DISABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], [disable-static]) +]) + +AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) +AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_STATIC], []) +dnl AC_DEFUN([AM_DISABLE_STATIC], []) + + + +# _LT_ENABLE_FAST_INSTALL([DEFAULT]) +# ---------------------------------- +# implement the --enable-fast-install flag, and support the `fast-install' +# and `disable-fast-install' LT_INIT options. +# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. +m4_define([_LT_ENABLE_FAST_INSTALL], +[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([fast-install], + [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], + [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac], + [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) + +_LT_DECL([fast_install], [enable_fast_install], [0], + [Whether or not to optimize for fast installation])dnl +])# _LT_ENABLE_FAST_INSTALL + +LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) + +# Old names: +AU_DEFUN([AC_ENABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the `fast-install' option into LT_INIT's first parameter.]) +]) + +AU_DEFUN([AC_DISABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], [disable-fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the `disable-fast-install' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) +dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) + + +# _LT_WITH_PIC([MODE]) +# -------------------- +# implement the --with-pic flag, and support the `pic-only' and `no-pic' +# LT_INIT options. +# MODE is either `yes' or `no'. If omitted, it defaults to `both'. +m4_define([_LT_WITH_PIC], +[AC_ARG_WITH([pic], + [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], + [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], + [lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for lt_pkg in $withval; do + IFS="$lt_save_ifs" + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac], + [pic_mode=default]) + +test -z "$pic_mode" && pic_mode=m4_default([$1], [default]) + +_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl +])# _LT_WITH_PIC + +LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) + +# Old name: +AU_DEFUN([AC_LIBTOOL_PICMODE], +[_LT_SET_OPTION([LT_INIT], [pic-only]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the `pic-only' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) + + +m4_define([_LTDL_MODE], []) +LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], + [m4_define([_LTDL_MODE], [nonrecursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [recursive], + [m4_define([_LTDL_MODE], [recursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [subproject], + [m4_define([_LTDL_MODE], [subproject])]) + +m4_define([_LTDL_TYPE], []) +LT_OPTION_DEFINE([LTDL_INIT], [installable], + [m4_define([_LTDL_TYPE], [installable])]) +LT_OPTION_DEFINE([LTDL_INIT], [convenience], + [m4_define([_LTDL_TYPE], [convenience])]) + +# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- +# +# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 6 ltsugar.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) + + +# lt_join(SEP, ARG1, [ARG2...]) +# ----------------------------- +# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their +# associated separator. +# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier +# versions in m4sugar had bugs. +m4_define([lt_join], +[m4_if([$#], [1], [], + [$#], [2], [[$2]], + [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) +m4_define([_lt_join], +[m4_if([$#$2], [2], [], + [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) + + +# lt_car(LIST) +# lt_cdr(LIST) +# ------------ +# Manipulate m4 lists. +# These macros are necessary as long as will still need to support +# Autoconf-2.59 which quotes differently. +m4_define([lt_car], [[$1]]) +m4_define([lt_cdr], +[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], + [$#], 1, [], + [m4_dquote(m4_shift($@))])]) +m4_define([lt_unquote], $1) + + +# lt_append(MACRO-NAME, STRING, [SEPARATOR]) +# ------------------------------------------ +# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'. +# Note that neither SEPARATOR nor STRING are expanded; they are appended +# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). +# No SEPARATOR is output if MACRO-NAME was previously undefined (different +# than defined and empty). +# +# This macro is needed until we can rely on Autoconf 2.62, since earlier +# versions of m4sugar mistakenly expanded SEPARATOR but not STRING. +m4_define([lt_append], +[m4_define([$1], + m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) + + + +# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) +# ---------------------------------------------------------- +# Produce a SEP delimited list of all paired combinations of elements of +# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list +# has the form PREFIXmINFIXSUFFIXn. +# Needed until we can rely on m4_combine added in Autoconf 2.62. +m4_define([lt_combine], +[m4_if(m4_eval([$# > 3]), [1], + [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl +[[m4_foreach([_Lt_prefix], [$2], + [m4_foreach([_Lt_suffix], + ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, + [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) + + +# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) +# ----------------------------------------------------------------------- +# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited +# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. +m4_define([lt_if_append_uniq], +[m4_ifdef([$1], + [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], + [lt_append([$1], [$2], [$3])$4], + [$5])], + [lt_append([$1], [$2], [$3])$4])]) + + +# lt_dict_add(DICT, KEY, VALUE) +# ----------------------------- +m4_define([lt_dict_add], +[m4_define([$1($2)], [$3])]) + + +# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) +# -------------------------------------------- +m4_define([lt_dict_add_subkey], +[m4_define([$1($2:$3)], [$4])]) + + +# lt_dict_fetch(DICT, KEY, [SUBKEY]) +# ---------------------------------- +m4_define([lt_dict_fetch], +[m4_ifval([$3], + m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), + m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) + + +# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) +# ----------------------------------------------------------------- +m4_define([lt_if_dict_fetch], +[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], + [$5], + [$6])]) + + +# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) +# -------------------------------------------------------------- +m4_define([lt_dict_filter], +[m4_if([$5], [], [], + [lt_join(m4_quote(m4_default([$4], [[, ]])), + lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), + [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl +]) + +# ltversion.m4 -- version numbers -*- Autoconf -*- +# +# Copyright (C) 2004 Free Software Foundation, Inc. +# Written by Scott James Remnant, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# @configure_input@ + +# serial 3337 ltversion.m4 +# This file is part of GNU Libtool + +m4_define([LT_PACKAGE_VERSION], [2.4.2]) +m4_define([LT_PACKAGE_REVISION], [1.3337]) + +AC_DEFUN([LTVERSION_VERSION], +[macro_version='2.4.2' +macro_revision='1.3337' +_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) +_LT_DECL(, macro_revision, 0) +]) + +# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- +# +# Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. +# Written by Scott James Remnant, 2004. +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 5 lt~obsolete.m4 + +# These exist entirely to fool aclocal when bootstrapping libtool. +# +# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN) +# which have later been changed to m4_define as they aren't part of the +# exported API, or moved to Autoconf or Automake where they belong. +# +# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN +# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us +# using a macro with the same name in our local m4/libtool.m4 it'll +# pull the old libtool.m4 in (it doesn't see our shiny new m4_define +# and doesn't know about Autoconf macros at all.) +# +# So we provide this file, which has a silly filename so it's always +# included after everything else. This provides aclocal with the +# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything +# because those macros already exist, or will be overwritten later. +# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. +# +# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. +# Yes, that means every name once taken will need to remain here until +# we give up compatibility with versions before 1.7, at which point +# we need to keep only those names which we still refer to. + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) + +m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) +m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) +m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) +m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) +m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) +m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) +m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) +m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) +m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) +m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) +m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) +m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) +m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) +m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) +m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) +m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) +m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) +m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) +m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) +m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) +m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) +m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) +m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) +m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) +m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) +m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) +m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) +m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) +m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) +m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) +m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) +m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) +m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) +m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) +m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) +m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) +m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) +m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) +m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) +m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) +m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) +m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) +m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) +m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) +m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) +m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) +m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) +m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) +m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) +m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) +m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) + +# Copyright (C) 2002-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.14' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.14.1], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.14.1])dnl +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) + +# Figure out how to run the assembler. -*- Autoconf -*- + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_AS +# ---------- +AC_DEFUN([AM_PROG_AS], +[# By default we simply use the C compiler to build assembly code. +AC_REQUIRE([AC_PROG_CC]) +test "${CCAS+set}" = set || CCAS=$CC +test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS +AC_ARG_VAR([CCAS], [assembler compiler command (defaults to CC)]) +AC_ARG_VAR([CCASFLAGS], [assembler compiler flags (defaults to CFLAGS)]) +_AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl +]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to +# '$srcdir', '$srcdir/..', or '$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is '.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[dnl Rely on autoconf to set up CDPATH properly. +AC_PREREQ([2.50])dnl +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ([2.52])dnl + m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +m4_define([_AM_COND_VALUE_$1], [$2])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], + [$1], [CXX], [depcc="$CXX" am_compiler_list=], + [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], + [$1], [UPC], [depcc="$UPC" am_compiler_list=], + [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + am__universal=false + m4_case([$1], [CC], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac], + [CXX], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac]) + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES. +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE([dependency-tracking], [dnl +AS_HELP_STRING( + [--enable-dependency-tracking], + [do not reject slow dependency extractors]) +AS_HELP_STRING( + [--disable-dependency-tracking], + [speeds up one-time build])]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +AC_SUBST([am__nodep])dnl +_AM_SUBST_NOTMAKE([am__nodep])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[{ + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + case $CONFIG_FILES in + *\'*) eval set x "$CONFIG_FILES" ;; + *) set x $CONFIG_FILES ;; + esac + shift + for mf + do + # Strip MF so we end up with the name of the file. + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named 'Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # Grep'ing the whole file is not good either: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then + dirpart=`AS_DIRNAME("$mf")` + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running 'make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "$am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`AS_DIRNAME(["$file"])` + AS_MKDIR_P([$dirpart/$fdir]) + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done + done +} +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking +# is enabled. FIXME. This creates each '.P' file that we will +# need in order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) +]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. +m4_define([AC_PROG_CC], +m4_defn([AC_PROG_CC]) +[_AM_PROG_CC_C_O +]) + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.65])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[AC_DIAGNOSE([obsolete], + [$0: two- and three-arguments forms are deprecated.]) +m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if( + m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]), + [ok:ok],, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) + AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) +AM_MISSING_PROG([AUTOCONF], [autoconf]) +AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) +AM_MISSING_PROG([AUTOHEADER], [autoheader]) +AM_MISSING_PROG([MAKEINFO], [makeinfo]) +AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html> +# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html> +AC_SUBST([mkdir_p], ['$(MKDIR_P)']) +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES([CC])], + [m4_define([AC_PROG_CC], + m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES([CXX])], + [m4_define([AC_PROG_CXX], + m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES([OBJC])], + [m4_define([AC_PROG_OBJC], + m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], + [_AM_DEPENDENCIES([OBJCXX])], + [m4_define([AC_PROG_OBJCXX], + m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl +]) +AC_REQUIRE([AM_SILENT_RULES])dnl +dnl The testsuite driver may need to know about EXEEXT, so add the +dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This +dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. +AC_CONFIG_COMMANDS_PRE(dnl +[m4_provide_if([_AM_COMPILER_EXEEXT], + [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542> + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: <http://www.gnu.org/software/coreutils/>. + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) + fi +fi]) + +dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not +dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further +dnl mangled by Autoconf and run in a shell conditional statement. +m4_define([_AC_COMPILER_EXEEXT], +m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_arg=$1 +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +if test x"${install_sh}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi +AC_SUBST([install_sh])]) + +# Copyright (C) 2003-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAKE_INCLUDE() +# ----------------- +# Check to see how make treats includes. +AC_DEFUN([AM_MAKE_INCLUDE], +[am_make=${MAKE-make} +cat > confinc << 'END' +am__doit: + @echo this is the am__doit target +.PHONY: am__doit +END +# If we don't find an include directive, just comment out the code. +AC_MSG_CHECKING([for style of include used by $am_make]) +am__include="#" +am__quote= +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# Ignore all kinds of additional output from 'make'. +case `$am_make -s -f confmf 2> /dev/null` in #( +*the\ am__doit\ target*) + am__include=include + am__quote= + _am_result=GNU + ;; +esac +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + case `$am_make -s -f confmf 2> /dev/null` in #( + *the\ am__doit\ target*) + am__include=.include + am__quote="\"" + _am_result=BSD + ;; + esac +fi +AC_SUBST([am__include]) +AC_SUBST([am__quote]) +AC_MSG_RESULT([$_am_result]) +rm -f confinc confmf +]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it is modern enough. +# If it is, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + AC_MSG_WARN(['missing' script is too old or missing]) +fi +]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# -------------------- +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), [1])]) + +# _AM_SET_OPTIONS(OPTIONS) +# ------------------------ +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_CC_C_O +# --------------- +# Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC +# to automatically call this. +AC_DEFUN([_AM_PROG_CC_C_O], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([compile])dnl +AC_LANG_PUSH([C])dnl +AC_CACHE_CHECK( + [whether $CC understands -c and -o together], + [am_cv_prog_cc_c_o], + [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i]) +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +AC_LANG_POP([C])]) + +# For backward compatibility. +AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac +case $srcdir in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken + alias in your environment]) + fi + if test "$[2]" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT([yes]) +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi +AC_CONFIG_COMMANDS_PRE( + [AC_MSG_CHECKING([that generated files are newer than configure]) + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + AC_MSG_RESULT([done])]) +rm -f conftest.file +]) + +# Copyright (C) 2009-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SILENT_RULES([DEFAULT]) +# -------------------------- +# Enable less verbose build rules; with the default set to DEFAULT +# ("yes" being less verbose, "no" or empty being verbose). +AC_DEFUN([AM_SILENT_RULES], +[AC_ARG_ENABLE([silent-rules], [dnl +AS_HELP_STRING( + [--enable-silent-rules], + [less verbose build output (undo: "make V=1")]) +AS_HELP_STRING( + [--disable-silent-rules], + [verbose build output (undo: "make V=0")])dnl +]) +case $enable_silent_rules in @%:@ ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; +esac +dnl +dnl A few 'make' implementations (e.g., NonStop OS and NextStep) +dnl do not support nested variable expansions. +dnl See automake bug#9928 and bug#10237. +am_make=${MAKE-make} +AC_CACHE_CHECK([whether $am_make supports nested variables], + [am_cv_make_support_nested_variables], + [if AS_ECHO([['TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi]) +if test $am_cv_make_support_nested_variables = yes; then + dnl Using '$V' instead of '$(V)' breaks IRIX make. + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AC_SUBST([AM_V])dnl +AM_SUBST_NOTMAKE([AM_V])dnl +AC_SUBST([AM_DEFAULT_V])dnl +AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl +AC_SUBST([AM_DEFAULT_VERBOSITY])dnl +AM_BACKSLASH='\' +AC_SUBST([AM_BACKSLASH])dnl +_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl +]) + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor 'install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in "make install-strip", and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# AM_SUBST_NOTMAKE(VARIABLE) +# -------------------------- +# Public sister of _AM_SUBST_NOTMAKE. +AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of 'v7', 'ustar', or 'pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +# +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AC_SUBST([AMTAR], ['$${TAR-tar}']) + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' + +m4_if([$1], [v7], + [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], + + [m4_case([$1], + [ustar], + [# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) + if test $am_uid -le $am_max_uid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi + AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) + if test $am_gid -le $am_max_gid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi], + + [pax], + [], + + [m4_fatal([Unknown tar format])]) + + AC_MSG_CHECKING([how to create a $1 tar archive]) + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_$1-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar <conftest.tar]) + AM_RUN_LOG([cat conftest.dir/file]) + grep GrepMe conftest.dir/file >/dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) + AC_MSG_RESULT([$am_cv_prog_tar_$1])]) + +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + diff --git a/vnet/compile b/vnet/compile new file mode 100755 index 00000000000..531136b068e --- /dev/null +++ b/vnet/compile @@ -0,0 +1,347 @@ +#! /bin/sh +# Wrapper for compilers which do not understand '-c -o'. + +scriptversion=2012-10-14.11; # UTC + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# Written by Tom Tromey <tromey@cygnus.com>. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to <bug-automake@gnu.org> or send patches to +# <automake-patches@gnu.org>. + +nl=' +' + +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent tools from complaining about whitespace usage. +IFS=" "" $nl" + +file_conv= + +# func_file_conv build_file lazy +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. If the determined conversion +# type is listed in (the comma separated) LAZY, no conversion will +# take place. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv/,$2, in + *,$file_conv,*) + ;; + mingw/*) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin/*) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine/*) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_cl_dashL linkdir +# Make cl look for libraries in LINKDIR +func_cl_dashL () +{ + func_file_conv "$1" + if test -z "$lib_path"; then + lib_path=$file + else + lib_path="$lib_path;$file" + fi + linker_opts="$linker_opts -LIBPATH:$file" +} + +# func_cl_dashl library +# Do a library search-path lookup for cl +func_cl_dashl () +{ + lib=$1 + found=no + save_IFS=$IFS + IFS=';' + for dir in $lib_path $LIB + do + IFS=$save_IFS + if $shared && test -f "$dir/$lib.dll.lib"; then + found=yes + lib=$dir/$lib.dll.lib + break + fi + if test -f "$dir/$lib.lib"; then + found=yes + lib=$dir/$lib.lib + break + fi + if test -f "$dir/lib$lib.a"; then + found=yes + lib=$dir/lib$lib.a + break + fi + done + IFS=$save_IFS + + if test "$found" != yes; then + lib=$lib.lib + fi +} + +# func_cl_wrapper cl arg... +# Adjust compile command to suit cl +func_cl_wrapper () +{ + # Assume a capable shell + lib_path= + shared=: + linker_opts= + for arg + do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + eat=1 + case $2 in + *.o | *.[oO][bB][jJ]) + func_file_conv "$2" + set x "$@" -Fo"$file" + shift + ;; + *) + func_file_conv "$2" + set x "$@" -Fe"$file" + shift + ;; + esac + ;; + -I) + eat=1 + func_file_conv "$2" mingw + set x "$@" -I"$file" + shift + ;; + -I*) + func_file_conv "${1#-I}" mingw + set x "$@" -I"$file" + shift + ;; + -l) + eat=1 + func_cl_dashl "$2" + set x "$@" "$lib" + shift + ;; + -l*) + func_cl_dashl "${1#-l}" + set x "$@" "$lib" + shift + ;; + -L) + eat=1 + func_cl_dashL "$2" + ;; + -L*) + func_cl_dashL "${1#-L}" + ;; + -static) + shared=false + ;; + -Wl,*) + arg=${1#-Wl,} + save_ifs="$IFS"; IFS=',' + for flag in $arg; do + IFS="$save_ifs" + linker_opts="$linker_opts $flag" + done + IFS="$save_ifs" + ;; + -Xlinker) + eat=1 + linker_opts="$linker_opts $2" + ;; + -*) + set x "$@" "$1" + shift + ;; + *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) + func_file_conv "$1" + set x "$@" -Tp"$file" + shift + ;; + *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) + func_file_conv "$1" mingw + set x "$@" "$file" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift + done + if test -n "$linker_opts"; then + linker_opts="-link$linker_opts" + fi + exec "$@" $linker_opts + exit 1 +} + +eat= + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: compile [--help] [--version] PROGRAM [ARGS] + +Wrapper for compilers which do not understand '-c -o'. +Remove '-o dest.o' from ARGS, run PROGRAM with the remaining +arguments, and rename the output as expected. + +If you are trying to build a whole package this is not the +right script to run: please start by reading the file 'INSTALL'. + +Report bugs to <bug-automake@gnu.org>. +EOF + exit $? + ;; + -v | --v*) + echo "compile $scriptversion" + exit $? + ;; + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe ) + func_cl_wrapper "$@" # Doesn't return... + ;; +esac + +ofile= +cfile= + +for arg +do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + # So we strip '-o arg' only if arg is an object. + eat=1 + case $2 in + *.o | *.obj) + ofile=$2 + ;; + *) + set x "$@" -o "$2" + shift + ;; + esac + ;; + *.c) + cfile=$1 + set x "$@" "$1" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift +done + +if test -z "$ofile" || test -z "$cfile"; then + # If no '-o' option was seen then we might have been invoked from a + # pattern rule where we don't need one. That is ok -- this is a + # normal compilation that the losing compiler can handle. If no + # '.c' file was seen then we are probably linking. That is also + # ok. + exec "$@" +fi + +# Name of file we expect compiler to create. +cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` + +# Create the lock directory. +# Note: use '[/\\:.-]' here to ensure that we don't use the same name +# that we are using for the .o file. Also, base the name on the expected +# object file name, since that is what matters with a parallel build. +lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d +while true; do + if mkdir "$lockdir" >/dev/null 2>&1; then + break + fi + sleep 1 +done +# FIXME: race condition here if user kills between mkdir and trap. +trap "rmdir '$lockdir'; exit 1" 1 2 15 + +# Run the compile. +"$@" +ret=$? + +if test -f "$cofile"; then + test "$cofile" = "$ofile" || mv "$cofile" "$ofile" +elif test -f "${cofile}bj"; then + test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" +fi + +rmdir "$lockdir" +exit $ret + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/vnet/config.guess b/vnet/config.guess new file mode 100755 index 00000000000..b79252d6b10 --- /dev/null +++ b/vnet/config.guess @@ -0,0 +1,1558 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright 1992-2013 Free Software Foundation, Inc. + +timestamp='2013-06-10' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). +# +# Originally written by Per Bothner. +# +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +# +# Please send patches with a ChangeLog entry to config-patches@gnu.org. + + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to <config-patches@gnu.org>." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright 1992-2013 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +case "${UNAME_SYSTEM}" in +Linux|GNU|GNU/*) + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + LIBC=gnu + + eval $set_cc_for_build + cat <<-EOF > $dummy.c + #include <features.h> + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #else + LIBC=gnu + #endif + EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently, or will in the future. + case "${UNAME_MACHINE_ARCH}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE} + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE="alpha" ;; + "EV4.5 (21064)") + UNAME_MACHINE="alpha" ;; + "LCA4 (21066/21068)") + UNAME_MACHINE="alpha" ;; + "EV5 (21164)") + UNAME_MACHINE="alphaev5" ;; + "EV5.6 (21164A)") + UNAME_MACHINE="alphaev56" ;; + "EV5.6 (21164PC)") + UNAME_MACHINE="alphapca56" ;; + "EV5.7 (21164PC)") + UNAME_MACHINE="alphapca57" ;; + "EV6 (21264)") + UNAME_MACHINE="alphaev6" ;; + "EV6.7 (21264A)") + UNAME_MACHINE="alphaev67" ;; + "EV6.8CB (21264C)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8AL (21264B)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8CX (21264D)") + UNAME_MACHINE="alphaev68" ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE="alphaev69" ;; + "EV7 (21364)") + UNAME_MACHINE="alphaev7" ;; + "EV7.9 (21364A)") + UNAME_MACHINE="alphaev79" ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; + arm*:riscos:*:*|arm*:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + s390x:SunOS:*:*) + echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux${UNAME_RELEASE} + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + eval $set_cc_for_build + SUN_ARCH="i386" + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH="x86_64" + fi + fi + echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include <stdio.h> /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos${UNAME_RELEASE} + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include <sys/systemcfg.h> + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + case "${UNAME_MACHINE}" in + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include <stdlib.h> + #include <unistd.h> + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ ${HP_ARCH} = "hppa2.0w" ] + then + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH="hppa2.0w" + else + HP_ARCH="hppa64" + fi + fi + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit ;; + 3050*:HI-UX:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include <unistd.h> + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:FreeBSD:*:*) + UNAME_PROCESSOR=`/usr/bin/uname -p` + case ${UNAME_PROCESSOR} in + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; + *:MINGW64*:*) + echo ${UNAME_MACHINE}-pc-mingw64 + exit ;; + *:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; + i*:MSYS*:*) + echo ${UNAME_MACHINE}-pc-msys + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; + *:Interix*:*) + case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + IA64) + echo ia64-unknown-interix${UNAME_RELEASE} + exit ;; + esac ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; + 8664:Windows_NT:*) + echo x86_64-pc-mks + exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + *:GNU:*:*) + # the GNU system + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + aarch64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC="gnulibc1" ; fi + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + arc:Linux:*:* | arceb:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi + else + echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf + fi + fi + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + cris:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + crisv32:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + frv:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + hexagon:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:Linux:*:*) + echo ${UNAME_MACHINE}-pc-linux-${LIBC} + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } + ;; + or1k:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + or32:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-${LIBC} + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-${LIBC} ;; + PA8*) echo hppa2.0-unknown-linux-${LIBC} ;; + *) echo hppa-unknown-linux-${LIBC} ;; + esac + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-${LIBC} + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-${LIBC} + exit ;; + ppc64le:Linux:*:*) + echo powerpc64le-unknown-linux-${LIBC} + exit ;; + ppcle:Linux:*:*) + echo powerpcle-unknown-linux-${LIBC} + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux-${LIBC} + exit ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + tile*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-${LIBC} + exit ;; + x86_64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + xtensa*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit ;; + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + else + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name` + echo ${UNAME_MACHINE}-pc-isc$UNAME_REL + elif /bin/uname -X 2>/dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + else + echo ${UNAME_MACHINE}-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configury will decide that + # this is a cross-build. + echo i586-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says <Richard.M.Bartel@ccMail.Census.GOV> + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes <hewes@openmarket.com>. + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; + x86_64:Haiku:*:*) + echo x86_64-unknown-haiku + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux${UNAME_RELEASE} + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux${UNAME_RELEASE} + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux${UNAME_RELEASE} + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + eval $set_cc_for_build + if test "$UNAME_PROCESSOR" = unknown ; then + UNAME_PROCESSOR=powerpc + fi + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + fi + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NEO-?:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk${UNAME_RELEASE} + exit ;; + NSE-*:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = "386"; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; + i*86:AROS:*:*) + echo ${UNAME_MACHINE}-pc-aros + exit ;; + x86_64:VMkernel:*:*) + echo ${UNAME_MACHINE}-unknown-esx + exit ;; +esac + +eval $set_cc_for_build +cat >$dummy.c <<EOF +#ifdef _SEQUENT_ +# include <sys/types.h> +# include <sys/utsname.h> +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include <sys/param.h> + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix\n"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); + +#endif + +#if defined (vax) +# if !defined (ultrix) +# include <sys/param.h> +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + c34*) + echo c34-convex-bsd + exit ;; + c38*) + echo c38-convex-bsd + exit ;; + c4*) + echo c4-convex-bsd + exit ;; + esac +fi + +cat >&2 <<EOF +$0: unable to guess system type + +This script, last modified $timestamp, has failed to recognize +the operating system you are using. It is advised that you +download the most up to date version of the config scripts from + + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +and + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD + +If the version you run ($0) is already up to date, please +send the following data and any information you think might be +pertinent to <config-patches@gnu.org> in order to provide the needed +information to handle your system. + +config.guess timestamp = $timestamp + +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/vnet/config.sub b/vnet/config.sub new file mode 100755 index 00000000000..9633db70467 --- /dev/null +++ b/vnet/config.sub @@ -0,0 +1,1791 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright 1992-2013 Free Software Foundation, Inc. + +timestamp='2013-08-10' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). + + +# Please send patches with a ChangeLog entry to config-patches@gnu.org. +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS + +Canonicalize a configuration name. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to <config-patches@gnu.org>." + +version="\ +GNU config.sub ($timestamp) + +Copyright 1992-2013 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ + linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | \ + kopensolaris*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + android-linux) + os=-linux-android + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray | -microblaze*) + os= + basic_machine=$1 + ;; + -bluegene*) + os=-cnk + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*178) + os=-lynxos178 + ;; + -lynx*5) + os=-lynxos5 + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | aarch64 | aarch64_be \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arceb \ + | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ + | avr | avr32 \ + | be32 | be64 \ + | bfin \ + | c4x | c8051 | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | epiphany \ + | fido | fr30 | frv \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | hexagon \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ + | le32 | le64 \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | microblazeel | mcore | mep | metag \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64octeon | mips64octeonel \ + | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipsr5900 | mipsr5900el \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | moxie \ + | mt \ + | msp430 \ + | nds32 | nds32le | nds32be \ + | nios | nios2 | nios2eb | nios2el \ + | ns16k | ns32k \ + | open8 \ + | or1k | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pyramid \ + | rl78 | rx \ + | score \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu \ + | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ + | ubicom32 \ + | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ + | we32k \ + | x86 | xc16x | xstormy16 | xtensa \ + | z8k | z80) + basic_machine=$basic_machine-unknown + ;; + c54x) + basic_machine=tic54x-unknown + ;; + c55x) + basic_machine=tic55x-unknown + ;; + c6x) + basic_machine=tic6x-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + ;; + ms1) + basic_machine=mt-unknown + ;; + + strongarm | thumb | xscale) + basic_machine=arm-unknown + ;; + xgate) + basic_machine=$basic_machine-unknown + os=-none + ;; + xscaleeb) + basic_machine=armeb-unknown + ;; + + xscaleel) + basic_machine=armel-unknown + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | aarch64-* | aarch64_be-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | be32-* | be64-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* \ + | c8051-* | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | hexagon-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ + | le32-* | le64-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ + | microblaze-* | microblazeel-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64octeon-* | mips64octeonel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64r5900-* | mips64r5900el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipsr5900-* | mipsr5900el-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nds32-* | nds32le-* | nds32be-* \ + | nios-* | nios2-* | nios2eb-* | nios2el-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | open8-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ + | pyramid-* \ + | rl78-* | romp-* | rs6000-* | rx-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ + | tahoe-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tile*-* \ + | tron-* \ + | ubicom32-* \ + | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ + | vax-* \ + | we32k-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* \ + | xstormy16-* | xtensa*-* \ + | ymp-* \ + | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aros) + basic_machine=i386-pc + os=-aros + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + bluegene*) + basic_machine=powerpc-ibm + os=-cnk + ;; + c54x-*) + basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c55x-*) + basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c6x-*) + basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + cegcc) + basic_machine=arm-unknown + os=-cegcc + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16 | cr16-*) + basic_machine=cr16-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + dicos) + basic_machine=i686-pc + os=-dicos + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2* | dpx2*-bull) + basic_machine=m68k-bull + os=-sysv3 + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + microblaze*) + basic_machine=microblaze-xilinx + ;; + mingw64) + basic_machine=x86_64-pc + os=-mingw64 + ;; + mingw32) + basic_machine=i686-pc + os=-mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + msys) + basic_machine=i686-pc + os=-msys + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + nacl) + basic_machine=le32-unknown + os=-nacl + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next ) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + neo-tandem) + basic_machine=neo-tandem + ;; + nse-tandem) + basic_machine=nse-tandem + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc | ppcbe) basic_machine=powerpc-unknown + ;; + ppc-* | ppcbe-*) + basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little | ppc64-le | powerpc64-little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + os=-rdos + ;; + rdos32) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sh5el) + basic_machine=sh5le-unknown + ;; + sh64) + basic_machine=sh64-unknown + ;; + sparclite-wrs | simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + strongarm-* | thumb-*) + basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tile*) + basic_machine=$basic_machine-unknown + os=-linux-gnu + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + xscale-* | xscalee[bl]-*) + basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'` + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + z80-*-coff) + basic_machine=z80-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases + # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. + -auroraux) + os=-auroraux + ;; + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # First accept the basic system types. + # The portable systems comes first. + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ + | -sym* | -kopensolaris* | -plan9* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* | -aros* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -bitrig* | -openbsd* | -solidbsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -chorusos* | -chorusrdb* | -cegcc* \ + | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-musl* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2 ) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -aros*) + os=-aros + ;; + -zvmoe) + os=-zvmoe + ;; + -dicos*) + os=-dicos + ;; + -nacl*) + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + c8051-*) + os=-elf + ;; + hexagon-*) + os=-elf + ;; + tic54x-*) + os=-coff + ;; + tic55x-*) + os=-coff + ;; + tic6x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + ;; + m68*-cisco) + os=-aout + ;; + mep-*) + os=-elf + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or1k-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + *-be) + os=-beos + ;; + *-haiku) + os=-haiku + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next ) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -cnk*|-aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os +exit + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/vnet/configure b/vnet/configure new file mode 100755 index 00000000000..633914bbfc0 --- /dev/null +++ b/vnet/configure @@ -0,0 +1,13807 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69 for vnet 1.1. +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 + + test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\ + || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, +$0: including any error possibly output before this +$0: message. Then install a modern shell, or manually run +$0: the script under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + +SHELL=${CONFIG_SHELL-/bin/sh} + + +test -n "$DJDIR" || exec 7<&0 </dev/null +exec 6>&1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='vnet' +PACKAGE_TARNAME='vnet' +PACKAGE_VERSION='1.1' +PACKAGE_STRING='vnet 1.1' +PACKAGE_BUGREPORT='' +PACKAGE_URL='' + +# Factoring default headers for most tests. +ac_includes_default="\ +#include <stdio.h> +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif +#ifdef HAVE_SYS_STAT_H +# include <sys/stat.h> +#endif +#ifdef STDC_HEADERS +# include <stdlib.h> +# include <stddef.h> +#else +# ifdef HAVE_STDLIB_H +# include <stdlib.h> +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include <memory.h> +# endif +# include <string.h> +#endif +#ifdef HAVE_STRINGS_H +# include <strings.h> +#endif +#ifdef HAVE_INTTYPES_H +# include <inttypes.h> +#endif +#ifdef HAVE_STDINT_H +# include <stdint.h> +#endif +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif" + +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS +LIBOBJS +VIRL +WITH_VIRL_FALSE +WITH_VIRL_TRUE +DPDK +WITH_DPDK_FALSE +WITH_DPDK_TRUE +am__fastdepCCAS_FALSE +am__fastdepCCAS_TRUE +CCASDEPMODE +CCASFLAGS +CCAS +CPP +OTOOL64 +OTOOL +LIPO +NMEDIT +DSYMUTIL +MANIFEST_TOOL +RANLIB +ac_ct_AR +AR +DLLTOOL +OBJDUMP +LN_S +NM +ac_ct_DUMPBIN +DUMPBIN +LD +FGREP +EGREP +GREP +SED +host_os +host_vendor +host_cpu +host +build_os +build_vendor +build_cpu +build +LIBTOOL +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +am__nodep +AMDEPBACKSLASH +AMDEP_FALSE +AMDEP_TRUE +am__quote +am__include +DEPDIR +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +AM_BACKSLASH +AM_DEFAULT_VERBOSITY +AM_DEFAULT_V +AM_V +am__untar +am__tar +AMTAR +am__leading_dot +SET_MAKE +AWK +mkdir_p +MKDIR_P +INSTALL_STRIP_PROGRAM +STRIP +install_sh +MAKEINFO +AUTOHEADER +AUTOMAKE +AUTOCONF +ACLOCAL +VERSION +PACKAGE +CYGPATH_W +am__isrc +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_silent_rules +enable_dependency_tracking +enable_shared +enable_static +with_pic +enable_fast_install +with_gnu_ld +with_sysroot +enable_libtool_lock +with_dpdk +with_virl +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP +CCAS +CCASFLAGS' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures vnet 1.1 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/vnet] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of vnet 1.1:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-silent-rules less verbose build output (undo: "make V=1") + --disable-silent-rules verbose build output (undo: "make V=0") + --enable-dependency-tracking + do not reject slow dependency extractors + --disable-dependency-tracking + speeds up one-time build + --enable-shared[=PKGS] build shared libraries [default=yes] + --enable-static[=PKGS] build static libraries [default=yes] + --enable-fast-install[=PKGS] + optimize for fast installation [default=yes] + --disable-libtool-lock avoid locking (might break parallel builds) + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use + both] + --with-gnu-ld assume the C compiler uses GNU ld [default=no] + --with-sysroot=DIR Search for dependent libraries within DIR + (or the compiler's sysroot if not specified). + --with-dpdk Use the Intel dpdk + --with-virl Compile for virl citizenship + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a + nonstandard directory <lib dir> + LIBS libraries to pass to the linker, e.g. -l<library> + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if + you have headers in a nonstandard directory <include dir> + CPP C preprocessor + CCAS assembler compiler command (defaults to CC) + CCASFLAGS assembler compiler flags (defaults to CFLAGS) + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to the package provider. +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +vnet configure 1.1 +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case <limits.h> declares $2. + For example, HP-UX 11i <limits.h> declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer <limits.h> to <assert.h> if __STDC__ is defined, since + <limits.h> exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include <limits.h> +#else +# include <assert.h> +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by vnet $as_me 1.1, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +am__api_version='1.14' + +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +$as_echo_n "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if ${ac_cv_path_install+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in #(( + ./ | .// | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +$as_echo "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +$as_echo_n "checking whether build environment is sane... " >&6; } +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + as_fn_error $? "ls -t appears to fail. Make sure there is not a broken + alias in your environment" "$LINENO" 5 + fi + if test "$2" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$2" = conftest.file + ) +then + # Ok. + : +else + as_fn_error $? "newly created file is older than distributed files! +Check your system clock" "$LINENO" 5 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi + +rm -f conftest.file + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was `s,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` + +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` + +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} +fi + +if test x"${install_sh}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 +$as_echo_n "checking for a thread-safe mkdir -p... " >&6; } +if test -z "$MKDIR_P"; then + if ${ac_cv_path_mkdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue + case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir (GNU coreutils) '* | \ + 'mkdir (coreutils) '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext + break 3;; + esac + done + done + done +IFS=$as_save_IFS + +fi + + test -d ./--version && rmdir ./--version + if test "${ac_cv_path_mkdir+set}" = set; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + MKDIR_P="$ac_install_sh -d" + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +$as_echo "$MKDIR_P" >&6; } + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AWK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +$as_echo "$AWK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + SET_MAKE= +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +# Check whether --enable-silent-rules was given. +if test "${enable_silent_rules+set}" = set; then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +$as_echo_n "checking whether $am_make supports nested variables... " >&6; } +if ${am_cv_make_support_nested_variables+:} false; then : + $as_echo_n "(cached) " >&6 +else + if $as_echo 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +$as_echo "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='vnet' + VERSION='1.1' + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE "$PACKAGE" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define VERSION "$VERSION" +_ACEOF + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html> +# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html> +mkdir_p='$(MKDIR_P)' + +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. +# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AMTAR='$${TAR-tar}' + + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar pax cpio none' + +am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' + + + + + + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542> + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: <http://www.gnu.org/software/coreutils/>. + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 + fi +fi +# Check whether --enable-silent-rules was given. +if test "${enable_silent_rules+set}" = set; then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +$as_echo_n "checking whether $am_make supports nested variables... " >&6; } +if ${am_cv_make_support_nested_variables+:} false; then : + $as_echo_n "(cached) " >&6 +else + if $as_echo 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +$as_echo "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <stdio.h> +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <stdarg.h> +#include <stdio.h> +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +$as_echo_n "checking whether $CC understands -c and -o together... " >&6; } +if ${am_cv_prog_cc_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +$as_echo "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + + +am_make=${MAKE-make} +cat > confinc << 'END' +am__doit: + @echo this is the am__doit target +.PHONY: am__doit +END +# If we don't find an include directive, just comment out the code. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5 +$as_echo_n "checking for style of include used by $am_make... " >&6; } +am__include="#" +am__quote= +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# Ignore all kinds of additional output from 'make'. +case `$am_make -s -f confmf 2> /dev/null` in #( +*the\ am__doit\ target*) + am__include=include + am__quote= + _am_result=GNU + ;; +esac +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + case `$am_make -s -f confmf 2> /dev/null` in #( + *the\ am__doit\ target*) + am__include=.include + am__quote="\"" + _am_result=BSD + ;; + esac +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5 +$as_echo "$_am_result" >&6; } +rm -f confinc confmf + +# Check whether --enable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then : + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + + +depcc="$CC" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CC_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + +case `pwd` in + *\ * | *\ *) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5 +$as_echo "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;; +esac + + + +macro_version='2.4.2' +macro_revision='1.3337' + + + + + + + + + + + + + +ltmain="$ac_aux_dir/ltmain.sh" + +# Make sure we can run config.sub. +$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +$as_echo_n "checking build system type... " >&6; } +if ${ac_cv_build+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` +test "x$ac_build_alias" = x && + as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 +ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +$as_echo "$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +$as_echo_n "checking host system type... " >&6; } +if ${ac_cv_host+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "x$host_alias" = x; then + ac_cv_host=$ac_cv_build +else + ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5 +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +$as_echo "$ac_cv_host" >&6; } +case $ac_cv_host in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; +esac +host=$ac_cv_host +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_host +shift +host_cpu=$1 +host_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +host_os=$* +IFS=$ac_save_IFS +case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac + + +# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\(["`$\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' + +ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5 +$as_echo_n "checking how to print strings... " >&6; } +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "" +} + +case "$ECHO" in + printf*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: printf" >&5 +$as_echo "printf" >&6; } ;; + print*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: print -r" >&5 +$as_echo "print -r" >&6; } ;; + *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: cat" >&5 +$as_echo "cat" >&6; } ;; +esac + + + + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 +$as_echo_n "checking for a sed that does not truncate output... " >&6; } +if ${ac_cv_path_SED+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for ac_i in 1 2 3 4 5 6 7; do + ac_script="$ac_script$as_nl$ac_script" + done + echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed + { ac_script=; unset ac_script;} + if test -z "$SED"; then + ac_path_SED_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_SED="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_SED" || continue +# Check for GNU ac_path_SED and select it if it is found. + # Check for GNU $ac_path_SED +case `"$ac_path_SED" --version 2>&1` in +*GNU*) + ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo '' >> "conftest.nl" + "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_SED_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_SED="$ac_path_SED" + ac_path_SED_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_SED_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_SED"; then + as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 + fi +else + ac_cv_path_SED=$SED +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 +$as_echo "$ac_cv_path_SED" >&6; } + SED="$ac_cv_path_SED" + rm -f conftest.sed + +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 +$as_echo_n "checking for fgrep... " >&6; } +if ${ac_cv_path_FGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 + then ac_cv_path_FGREP="$GREP -F" + else + if test -z "$FGREP"; then + ac_path_FGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in fgrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_FGREP" || continue +# Check for GNU ac_path_FGREP and select it if it is found. + # Check for GNU $ac_path_FGREP +case `"$ac_path_FGREP" --version 2>&1` in +*GNU*) + ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'FGREP' >> "conftest.nl" + "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_FGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_FGREP="$ac_path_FGREP" + ac_path_FGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_FGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_FGREP"; then + as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_FGREP=$FGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 +$as_echo "$ac_cv_path_FGREP" >&6; } + FGREP="$ac_cv_path_FGREP" + + +test -z "$GREP" && GREP=grep + + + + + + + + + + + + + + + + + + + +# Check whether --with-gnu-ld was given. +if test "${with_gnu_ld+set}" = set; then : + withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes +else + with_gnu_ld=no +fi + +ac_prog=ld +if test "$GCC" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 +$as_echo_n "checking for ld used by $CC... " >&6; } + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [\\/]* | ?:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 +$as_echo_n "checking for GNU ld... " >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 +$as_echo_n "checking for non-GNU ld... " >&6; } +fi +if ${lt_cv_path_LD+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$LD"; then + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in + *GNU* | *'with BFD'*) + test "$with_gnu_ld" != no && break + ;; + *) + test "$with_gnu_ld" != yes && break + ;; + esac + fi + done + IFS="$lt_save_ifs" +else + lt_cv_path_LD="$LD" # Let the user override the test with a path. +fi +fi + +LD="$lt_cv_path_LD" +if test -n "$LD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LD" >&5 +$as_echo "$LD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 +$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; } +if ${lt_cv_prog_gnu_ld+:} false; then : + $as_echo_n "(cached) " >&6 +else + # I'd rather use --version here, but apparently some GNU lds only accept -v. +case `$LD -v 2>&1 </dev/null` in +*GNU* | *'with BFD'*) + lt_cv_prog_gnu_ld=yes + ;; +*) + lt_cv_prog_gnu_ld=no + ;; +esac +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_gnu_ld" >&5 +$as_echo "$lt_cv_prog_gnu_ld" >&6; } +with_gnu_ld=$lt_cv_prog_gnu_ld + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5 +$as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; } +if ${lt_cv_path_NM+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM="$NM" +else + lt_nm_to_check="${ac_tool_prefix}nm" + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + tmp_nm="$ac_dir/$lt_tmp_nm" + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the `sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in + */dev/null* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS="$lt_save_ifs" + done + : ${lt_cv_path_NM=no} +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5 +$as_echo "$lt_cv_path_NM" >&6; } +if test "$lt_cv_path_NM" != "no"; then + NM="$lt_cv_path_NM" +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + if test -n "$ac_tool_prefix"; then + for ac_prog in dumpbin "link -dump" + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DUMPBIN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DUMPBIN"; then + ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DUMPBIN=$ac_cv_prog_DUMPBIN +if test -n "$DUMPBIN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5 +$as_echo "$DUMPBIN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$DUMPBIN" && break + done +fi +if test -z "$DUMPBIN"; then + ac_ct_DUMPBIN=$DUMPBIN + for ac_prog in dumpbin "link -dump" +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DUMPBIN"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN +if test -n "$ac_ct_DUMPBIN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5 +$as_echo "$ac_ct_DUMPBIN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_DUMPBIN" && break +done + + if test "x$ac_ct_DUMPBIN" = x; then + DUMPBIN=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DUMPBIN=$ac_ct_DUMPBIN + fi +fi + + case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols" + ;; + *) + DUMPBIN=: + ;; + esac + fi + + if test "$DUMPBIN" != ":"; then + NM="$DUMPBIN" + fi +fi +test -z "$NM" && NM=nm + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5 +$as_echo_n "checking the name lister ($NM) interface... " >&6; } +if ${lt_cv_nm_interface+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: output\"" >&5) + cat conftest.out >&5 + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5 +$as_echo "$lt_cv_nm_interface" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 +$as_echo_n "checking whether ln -s works... " >&6; } +LN_S=$as_ln_s +if test "$LN_S" = "ln -s"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 +$as_echo "no, using $LN_S" >&6; } +fi + +# find the maximum length of command line arguments +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5 +$as_echo_n "checking the maximum length of command line arguments... " >&6; } +if ${lt_cv_sys_max_cmd_len+:} false; then : + $as_echo_n "(cached) " >&6 +else + i=0 + teststring="ABCD" + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[ ]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8 ; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test $i != 17 # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac + +fi + +if test -n $lt_cv_sys_max_cmd_len ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5 +$as_echo "$lt_cv_sys_max_cmd_len" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none" >&5 +$as_echo "none" >&6; } +fi +max_cmd_len=$lt_cv_sys_max_cmd_len + + + + + + +: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands some XSI constructs" >&5 +$as_echo_n "checking whether the shell understands some XSI constructs... " >&6; } +# Try some XSI features +xsi_shell=no +( _lt_dummy="a/b/c" + test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \ + = c,a/b,b/c, \ + && eval 'test $(( 1 + 1 )) -eq 2 \ + && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \ + && xsi_shell=yes +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $xsi_shell" >&5 +$as_echo "$xsi_shell" >&6; } + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands \"+=\"" >&5 +$as_echo_n "checking whether the shell understands \"+=\"... " >&6; } +lt_shell_append=no +( foo=bar; set foo baz; eval "$1+=\$2" && test "$foo" = barbaz ) \ + >/dev/null 2>&1 \ + && lt_shell_append=yes +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_shell_append" >&5 +$as_echo "$lt_shell_append" >&6; } + + +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi + + + + + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5 +$as_echo_n "checking how to convert $build file names to $host format... " >&6; } +if ${lt_cv_to_host_file_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac + +fi + +to_host_file_cmd=$lt_cv_to_host_file_cmd +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5 +$as_echo "$lt_cv_to_host_file_cmd" >&6; } + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5 +$as_echo_n "checking how to convert $build file names to toolchain format... " >&6; } +if ${lt_cv_to_tool_file_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + #assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac + +fi + +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5 +$as_echo "$lt_cv_to_tool_file_cmd" >&6; } + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5 +$as_echo_n "checking for $LD option to reload object files... " >&6; } +if ${lt_cv_ld_reload_flag+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_reload_flag='-r' +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5 +$as_echo "$lt_cv_ld_reload_flag" >&6; } +reload_flag=$lt_cv_ld_reload_flag +case $reload_flag in +"" | " "*) ;; +*) reload_flag=" $reload_flag" ;; +esac +reload_cmds='$LD$reload_flag -o $output$reload_objs' +case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + if test "$GCC" != yes; then + reload_cmds=false + fi + ;; + darwin*) + if test "$GCC" = yes; then + reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs' + else + reload_cmds='$LD$reload_flag -o $output$reload_objs' + fi + ;; +esac + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OBJDUMP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OBJDUMP=$ac_cv_prog_OBJDUMP +if test -n "$OBJDUMP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 +$as_echo "$OBJDUMP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OBJDUMP"; then + ac_ct_OBJDUMP=$OBJDUMP + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OBJDUMP"; then + ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OBJDUMP="objdump" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP +if test -n "$ac_ct_OBJDUMP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 +$as_echo "$ac_ct_OBJDUMP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OBJDUMP" = x; then + OBJDUMP="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OBJDUMP=$ac_ct_OBJDUMP + fi +else + OBJDUMP="$ac_cv_prog_OBJDUMP" +fi + +test -z "$OBJDUMP" && OBJDUMP=objdump + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5 +$as_echo_n "checking how to recognize dependent libraries... " >&6; } +if ${lt_cv_deplibs_check_method+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# `unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# which responds to the $file_magic_cmd with a given extended regex. +# If you have `file' or equivalent on your system and you're not sure +# whether `pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[4-9]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[45]*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='/usr/bin/file -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin. + if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=/usr/bin/file + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]' + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[3-9]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +esac + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5 +$as_echo "$lt_cv_deplibs_check_method" >&6; } + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + + + + + + + + + + + + + + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DLLTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DLLTOOL=$ac_cv_prog_DLLTOOL +if test -n "$DLLTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 +$as_echo "$DLLTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DLLTOOL"; then + ac_ct_DLLTOOL=$DLLTOOL + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DLLTOOL"; then + ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DLLTOOL="dlltool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL +if test -n "$ac_ct_DLLTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 +$as_echo "$ac_ct_DLLTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_DLLTOOL" = x; then + DLLTOOL="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DLLTOOL=$ac_ct_DLLTOOL + fi +else + DLLTOOL="$ac_cv_prog_DLLTOOL" +fi + +test -z "$DLLTOOL" && DLLTOOL=dlltool + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5 +$as_echo_n "checking how to associate runtime and link libraries... " >&6; } +if ${lt_cv_sharedlib_from_linklib_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh + # decide which to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd="$ECHO" + ;; +esac + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5 +$as_echo "$lt_cv_sharedlib_from_linklib_cmd" >&6; } +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + + + + + + + + +if test -n "$ac_tool_prefix"; then + for ac_prog in ar + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AR="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AR=$ac_cv_prog_AR +if test -n "$AR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 +$as_echo "$AR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AR" && break + done +fi +if test -z "$AR"; then + ac_ct_AR=$AR + for ac_prog in ar +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_AR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_AR"; then + ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AR="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AR=$ac_cv_prog_ac_ct_AR +if test -n "$ac_ct_AR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 +$as_echo "$ac_ct_AR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_AR" && break +done + + if test "x$ac_ct_AR" = x; then + AR="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AR=$ac_ct_AR + fi +fi + +: ${AR=ar} +: ${AR_FLAGS=cru} + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5 +$as_echo_n "checking for archiver @FILE support... " >&6; } +if ${lt_cv_ar_at_file+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ar_at_file=no + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -eq 0; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -ne 0; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5 +$as_echo "$lt_cv_ar_at_file" >&6; } + +if test "x$lt_cv_ar_at_file" = xno; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +test -z "$STRIP" && STRIP=: + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +$as_echo "$RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +$as_echo "$ac_ct_RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + +test -z "$RANLIB" && RANLIB=: + + + + + + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + +# Check for command to grab the raw symbol name followed by C symbol from nm. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5 +$as_echo_n "checking command to parse $NM output from $compiler object... " >&6; } +if ${lt_cv_sys_global_symbol_pipe+:} false; then : + $as_echo_n "(cached) " >&6 +else + +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[BCDEGRST]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([_A-Za-z][_A-Za-z0-9]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[BCDT]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[ABCDGISTW]' + ;; +hpux*) + if test "$host_cpu" = ia64; then + symcode='[ABCDEGRST]' + fi + ;; +irix* | nonstopux*) + symcode='[BCDEGRST]' + ;; +osf*) + symcode='[BCDEGQRST]' + ;; +solaris*) + symcode='[BDRT]' + ;; +sco3.2v5*) + symcode='[DT]' + ;; +sysv4.2uw2*) + symcode='[DT]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[ABDT]' + ;; +sysv4) + symcode='[DFNSTU]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[ABCDGIRSTW]' ;; +esac + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"\2\", (void *) \&\2},/p'" +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \(lib[^ ]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"lib\2\", (void *) \&\2},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function + # and D for any global variable. + # Also find C++ and __fastcall symbols from MSVC++, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK '"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\ +" {split(\$ 0, a, /\||\r/); split(a[2], s)};"\ +" s[1]~/^[@?]/{print s[1], s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx" + else + lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + # Now try to grab the symbols. + nlist=conftest.nm + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5 + (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) +/* DATA imports from DLLs on WIN32 con't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined(__osf__) +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS="conftstm.$ac_objext" + CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag" + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest${ac_exeext}; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&5 + fi + else + echo "cannot find nm_test_var in $nlist" >&5 + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 + fi + else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test "$pipe_works" = yes; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done + +fi + +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: failed" >&5 +$as_echo "failed" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5 +$as_echo "ok" >&6; } +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5 +$as_echo_n "checking for sysroot... " >&6; } + +# Check whether --with-sysroot was given. +if test "${with_sysroot+set}" = set; then : + withval=$with_sysroot; +else + with_sysroot=no +fi + + +lt_sysroot= +case ${with_sysroot} in #( + yes) + if test "$GCC" = yes; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_sysroot}" >&5 +$as_echo "${with_sysroot}" >&6; } + as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5 + ;; +esac + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5 +$as_echo "${lt_sysroot:-no}" >&6; } + + + + + +# Check whether --enable-libtool-lock was given. +if test "${enable_libtool_lock+set}" = set; then : + enableval=$enable_libtool_lock; +fi + +test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE="32" + ;; + *ELF-64*) + HPUX_IA64_MODE="64" + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out which ABI we are using. + echo '#line '$LINENO' "configure"' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + if test "$lt_cv_prog_gnu_ld" = yes; then + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + case `/usr/bin/file conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac + ;; + powerpc64le-*) + LD="${LD-ld} -m elf32lppclinux" + ;; + powerpc64-*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + powerpcle-*) + LD="${LD-ld} -m elf64lppc" + ;; + powerpc-*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -belf" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5 +$as_echo_n "checking whether the C compiler needs -belf... " >&6; } +if ${lt_cv_cc_needs_belf+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_cc_needs_belf=yes +else + lt_cv_cc_needs_belf=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5 +$as_echo "$lt_cv_cc_needs_belf" >&6; } + if test x"$lt_cv_cc_needs_belf" != x"yes"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS="$SAVE_CFLAGS" + fi + ;; +*-*solaris*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD="${LD-ld}_sol2" + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks="$enable_libtool_lock" + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args. +set dummy ${ac_tool_prefix}mt; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_MANIFEST_TOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$MANIFEST_TOOL"; then + ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL +if test -n "$MANIFEST_TOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5 +$as_echo "$MANIFEST_TOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_MANIFEST_TOOL"; then + ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL + # Extract the first word of "mt", so it can be a program name with args. +set dummy mt; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_MANIFEST_TOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_MANIFEST_TOOL"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="mt" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL +if test -n "$ac_ct_MANIFEST_TOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5 +$as_echo "$ac_ct_MANIFEST_TOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_MANIFEST_TOOL" = x; then + MANIFEST_TOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL + fi +else + MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL" +fi + +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5 +$as_echo_n "checking if $MANIFEST_TOOL is a manifest tool... " >&6; } +if ${lt_cv_path_mainfest_tool+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5 + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&5 + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5 +$as_echo "$lt_cv_path_mainfest_tool" >&6; } +if test "x$lt_cv_path_mainfest_tool" != xyes; then + MANIFEST_TOOL=: +fi + + + + + + + case $host_os in + rhapsody* | darwin*) + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args. +set dummy ${ac_tool_prefix}dsymutil; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DSYMUTIL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DSYMUTIL"; then + ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DSYMUTIL=$ac_cv_prog_DSYMUTIL +if test -n "$DSYMUTIL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5 +$as_echo "$DSYMUTIL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DSYMUTIL"; then + ac_ct_DSYMUTIL=$DSYMUTIL + # Extract the first word of "dsymutil", so it can be a program name with args. +set dummy dsymutil; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DSYMUTIL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DSYMUTIL"; then + ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DSYMUTIL="dsymutil" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL +if test -n "$ac_ct_DSYMUTIL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5 +$as_echo "$ac_ct_DSYMUTIL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_DSYMUTIL" = x; then + DSYMUTIL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DSYMUTIL=$ac_ct_DSYMUTIL + fi +else + DSYMUTIL="$ac_cv_prog_DSYMUTIL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args. +set dummy ${ac_tool_prefix}nmedit; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_NMEDIT+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$NMEDIT"; then + ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +NMEDIT=$ac_cv_prog_NMEDIT +if test -n "$NMEDIT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5 +$as_echo "$NMEDIT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_NMEDIT"; then + ac_ct_NMEDIT=$NMEDIT + # Extract the first word of "nmedit", so it can be a program name with args. +set dummy nmedit; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_NMEDIT+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_NMEDIT"; then + ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_NMEDIT="nmedit" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT +if test -n "$ac_ct_NMEDIT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5 +$as_echo "$ac_ct_NMEDIT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_NMEDIT" = x; then + NMEDIT=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + NMEDIT=$ac_ct_NMEDIT + fi +else + NMEDIT="$ac_cv_prog_NMEDIT" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args. +set dummy ${ac_tool_prefix}lipo; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_LIPO+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$LIPO"; then + ac_cv_prog_LIPO="$LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_LIPO="${ac_tool_prefix}lipo" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +LIPO=$ac_cv_prog_LIPO +if test -n "$LIPO"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5 +$as_echo "$LIPO" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_LIPO"; then + ac_ct_LIPO=$LIPO + # Extract the first word of "lipo", so it can be a program name with args. +set dummy lipo; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_LIPO+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_LIPO"; then + ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_LIPO="lipo" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO +if test -n "$ac_ct_LIPO"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5 +$as_echo "$ac_ct_LIPO" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_LIPO" = x; then + LIPO=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + LIPO=$ac_ct_LIPO + fi +else + LIPO="$ac_cv_prog_LIPO" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OTOOL"; then + ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL="${ac_tool_prefix}otool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL=$ac_cv_prog_OTOOL +if test -n "$OTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5 +$as_echo "$OTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL"; then + ac_ct_OTOOL=$OTOOL + # Extract the first word of "otool", so it can be a program name with args. +set dummy otool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OTOOL"; then + ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL="otool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL +if test -n "$ac_ct_OTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5 +$as_echo "$ac_ct_OTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OTOOL" = x; then + OTOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL=$ac_ct_OTOOL + fi +else + OTOOL="$ac_cv_prog_OTOOL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool64; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OTOOL64+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OTOOL64"; then + ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL64=$ac_cv_prog_OTOOL64 +if test -n "$OTOOL64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5 +$as_echo "$OTOOL64" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL64"; then + ac_ct_OTOOL64=$OTOOL64 + # Extract the first word of "otool64", so it can be a program name with args. +set dummy otool64; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OTOOL64+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OTOOL64"; then + ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL64="otool64" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64 +if test -n "$ac_ct_OTOOL64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5 +$as_echo "$ac_ct_OTOOL64" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OTOOL64" = x; then + OTOOL64=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL64=$ac_ct_OTOOL64 + fi +else + OTOOL64="$ac_cv_prog_OTOOL64" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5 +$as_echo_n "checking for -single_module linker flag... " >&6; } +if ${lt_cv_apple_cc_single_mod+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_apple_cc_single_mod=no + if test -z "${LT_MULTI_MODULE}"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&5 + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test $_lt_result -eq 0; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&5 + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5 +$as_echo "$lt_cv_apple_cc_single_mod" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5 +$as_echo_n "checking for -exported_symbols_list linker flag... " >&6; } +if ${lt_cv_ld_exported_symbols_list+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_ld_exported_symbols_list=yes +else + lt_cv_ld_exported_symbols_list=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS="$save_LDFLAGS" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5 +$as_echo "$lt_cv_ld_exported_symbols_list" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5 +$as_echo_n "checking for -force_load linker flag... " >&6; } +if ${lt_cv_ld_force_load+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5 + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5 + echo "$AR cru libconftest.a conftest.o" >&5 + $AR cru libconftest.a conftest.o 2>&5 + echo "$RANLIB libconftest.a" >&5 + $RANLIB libconftest.a 2>&5 + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&5 + elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&5 + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 +$as_echo "$lt_cv_ld_force_load" >&6; } + case $host_os in + rhapsody* | darwin1.[012]) + _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + darwin*) # darwin 5.x on + # if running on 10.5 or later, the deployment target defaults + # to the OS version, if on x86, and 10.4, the deployment + # target defaults to 10.4. Don't you love it? + case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in + 10.0,*86*-darwin8*|10.0,*-darwin[91]*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + 10.[012]*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + 10.*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test "$lt_cv_apple_cc_single_mod" = "yes"; then + _lt_dar_single_mod='$single_module' + fi + if test "$lt_cv_ld_exported_symbols_list" = "yes"; then + _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}' + fi + if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since + # <limits.h> exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include <limits.h> +#else +# include <assert.h> +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <ac_nonexistent.h> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since + # <limits.h> exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include <limits.h> +#else +# include <assert.h> +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <ac_nonexistent.h> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <float.h> + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <string.h> + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <stdlib.h> + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <ctype.h> +#include <stdlib.h> +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in dlfcn.h +do : + ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default +" +if test "x$ac_cv_header_dlfcn_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_DLFCN_H 1 +_ACEOF + +fi + +done + + + + + +# Set options + + + + enable_dlopen=no + + + enable_win32_dll=no + + + # Check whether --enable-shared was given. +if test "${enable_shared+set}" = set; then : + enableval=$enable_shared; p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac +else + enable_shared=yes +fi + + + + + + + + + + # Check whether --enable-static was given. +if test "${enable_static+set}" = set; then : + enableval=$enable_static; p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac +else + enable_static=yes +fi + + + + + + + + + + +# Check whether --with-pic was given. +if test "${with_pic+set}" = set; then : + withval=$with_pic; lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for lt_pkg in $withval; do + IFS="$lt_save_ifs" + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac +else + pic_mode=default +fi + + +test -z "$pic_mode" && pic_mode=default + + + + + + + + # Check whether --enable-fast-install was given. +if test "${enable_fast_install+set}" = set; then : + enableval=$enable_fast_install; p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac +else + enable_fast_install=yes +fi + + + + + + + + + + + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS="$ltmain" + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +test -z "$LN_S" && LN_S="ln -s" + + + + + + + + + + + + + + +if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5 +$as_echo_n "checking for objdir... " >&6; } +if ${lt_cv_objdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5 +$as_echo "$lt_cv_objdir" >&6; } +objdir=$lt_cv_objdir + + + + + +cat >>confdefs.h <<_ACEOF +#define LT_OBJDIR "$lt_cv_objdir/" +_ACEOF + + + + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a `.a' archive for static linking (except MSVC, +# which needs '.lib'). +libext=a + +with_gnu_ld="$lt_cv_prog_gnu_ld" + +old_CC="$CC" +old_CFLAGS="$CFLAGS" + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +for cc_temp in $compiler""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac +done +cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` + + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5 +$as_echo_n "checking for ${ac_tool_prefix}file... " >&6; } +if ${lt_cv_path_MAGIC_CMD+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD="$MAGIC_CMD" + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/${ac_tool_prefix}file; then + lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS="$lt_save_ifs" + MAGIC_CMD="$lt_save_MAGIC_CMD" + ;; +esac +fi + +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +$as_echo "$MAGIC_CMD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + + + +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for file" >&5 +$as_echo_n "checking for file... " >&6; } +if ${lt_cv_path_MAGIC_CMD+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD="$MAGIC_CMD" + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/file; then + lt_cv_path_MAGIC_CMD="$ac_dir/file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS="$lt_save_ifs" + MAGIC_CMD="$lt_save_MAGIC_CMD" + ;; +esac +fi + +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +$as_echo "$MAGIC_CMD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + else + MAGIC_CMD=: + fi +fi + + fi + ;; +esac + +# Use C for the default configuration in the libtool script + +lt_save_CC="$CC" +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +objext=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + +if test -n "$compiler"; then + +lt_prog_compiler_no_builtin_flag= + +if test "$GCC" = yes; then + case $cc_basename in + nvcc*) + lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;; + *) + lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;; + esac + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 +$as_echo_n "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; } +if ${lt_cv_prog_compiler_rtti_exceptions+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_rtti_exceptions=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="-fno-rtti -fno-exceptions" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_rtti_exceptions=yes + fi + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 +$as_echo "$lt_cv_prog_compiler_rtti_exceptions" >&6; } + +if test x"$lt_cv_prog_compiler_rtti_exceptions" = xyes; then + lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions" +else + : +fi + +fi + + + + + + + lt_prog_compiler_wl= +lt_prog_compiler_pic= +lt_prog_compiler_static= + + + if test "$GCC" = yes; then + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_static='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic='-DDLL_EXPORT' + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + ;; + + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + lt_prog_compiler_can_build_shared=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic=-Kconform_pic + fi + ;; + + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + lt_prog_compiler_wl='-Xlinker ' + if test -n "$lt_prog_compiler_pic"; then + lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + lt_prog_compiler_wl='-Wl,' + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + else + lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic='-DDLL_EXPORT' + ;; + + hpux9* | hpux10* | hpux11*) + lt_prog_compiler_wl='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + lt_prog_compiler_static='${wl}-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + lt_prog_compiler_wl='-Wl,' + # PIC (with -KPIC) is the default. + lt_prog_compiler_static='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64 which still supported -KPIC. + ecc*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='--shared' + lt_prog_compiler_static='--static' + ;; + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl='-Wl,-Wl,,' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + ccc*) + lt_prog_compiler_wl='-Wl,' + # All Alpha code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-qpic' + lt_prog_compiler_static='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='' + ;; + *Sun\ F* | *Sun*Fortran*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Wl,' + ;; + *Intel*\ [CF]*Compiler*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + *Portland\ Group*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + lt_prog_compiler_wl='-Wl,' + # All OSF/1 code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + + rdos*) + lt_prog_compiler_static='-non_shared' + ;; + + solaris*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + lt_prog_compiler_wl='-Qoption ld ';; + *) + lt_prog_compiler_wl='-Wl,';; + esac + ;; + + sunos4*) + lt_prog_compiler_wl='-Qoption ld ' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec ;then + lt_prog_compiler_pic='-Kconform_pic' + lt_prog_compiler_static='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + unicos*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_can_build_shared=no + ;; + + uts4*) + lt_prog_compiler_pic='-pic' + lt_prog_compiler_static='-Bstatic' + ;; + + *) + lt_prog_compiler_can_build_shared=no + ;; + esac + fi + +case $host_os in + # For platforms which do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic= + ;; + *) + lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC" + ;; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +$as_echo_n "checking for $compiler option to produce PIC... " >&6; } +if ${lt_cv_prog_compiler_pic+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic=$lt_prog_compiler_pic +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5 +$as_echo "$lt_cv_prog_compiler_pic" >&6; } +lt_prog_compiler_pic=$lt_cv_prog_compiler_pic + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5 +$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; } +if ${lt_cv_prog_compiler_pic_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic_works=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic -DPIC" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works=yes + fi + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5 +$as_echo "$lt_cv_prog_compiler_pic_works" >&6; } + +if test x"$lt_cv_prog_compiler_pic_works" = xyes; then + case $lt_prog_compiler_pic in + "" | " "*) ;; + *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;; + esac +else + lt_prog_compiler_pic= + lt_prog_compiler_can_build_shared=no +fi + +fi + + + + + + + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if ${lt_cv_prog_compiler_static_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_static_works=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works=yes + fi + else + lt_cv_prog_compiler_static_works=yes + fi + fi + $RM -r conftest* + LDFLAGS="$save_LDFLAGS" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5 +$as_echo "$lt_cv_prog_compiler_static_works" >&6; } + +if test x"$lt_cv_prog_compiler_static_works" = xyes; then + : +else + lt_prog_compiler_static= +fi + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +$as_echo "$lt_cv_prog_compiler_c_o" >&6; } + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +$as_echo "$lt_cv_prog_compiler_c_o" >&6; } + + + + +hard_links="nottested" +if test "$lt_cv_prog_compiler_c_o" = no && test "$need_locks" != no; then + # do not overwrite the value of need_locks provided by the user + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +$as_echo_n "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +$as_echo "$hard_links" >&6; } + if test "$hard_links" = no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5 +$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + runpath_var= + allow_undefined_flag= + always_export_symbols=no + archive_cmds= + archive_expsym_cmds= + compiler_needs_object=no + enable_shared_with_static_runtimes=no + export_dynamic_flag_spec= + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + hardcode_automatic=no + hardcode_direct=no + hardcode_direct_absolute=no + hardcode_libdir_flag_spec= + hardcode_libdir_separator= + hardcode_minus_L=no + hardcode_shlibpath_var=unsupported + inherit_rpath=no + link_all_deplibs=unknown + module_cmds= + module_expsym_cmds= + old_archive_from_new_cmds= + old_archive_from_expsyms_cmds= + thread_safe_flag_spec= + whole_archive_flag_spec= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + include_expsyms= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ` (' and `)$', so one must not match beginning or + # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', + # as well as any symbol that contains `d'. + exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test "$GCC" != yes; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++) + with_gnu_ld=yes + ;; + openbsd*) + with_gnu_ld=no + ;; + linux* | k*bsd*-gnu | gnu*) + link_all_deplibs=no + ;; + esac + + ld_shlibs=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test "$with_gnu_ld" = yes; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; + *\ \(GNU\ Binutils\)\ [3-9]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test "$lt_use_gnu_ld_interface" = yes; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='${wl}' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + export_dynamic_flag_spec='${wl}--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + whole_archive_flag_spec= + fi + supports_anon_versioning=no + case `$LD -v 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[3-9]*) + # On AIX/PPC, the GNU linker is very broken + if test "$host_cpu" != ia64; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag=unsupported + # Joseph Beckenbach <jrb3@best.com> says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + ld_shlibs=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec='-L$libdir' + export_dynamic_flag_spec='${wl}--export-all-symbols' + allow_undefined_flag=unsupported + always_export_symbols=no + enable_shared_with_static_runtimes=yes + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is; otherwise, prepend... + archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs=no + fi + ;; + + haiku*) + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + link_all_deplibs=yes + ;; + + interix[3-9]*) + hardcode_direct=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + export_dynamic_flag_spec='${wl}-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test "$host_os" = linux-dietlibc; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test "$tmp_diet" = no + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + whole_archive_flag_spec= + tmp_sharedflag='--shared' ;; + xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + compiler_needs_object=yes + ;; + esac + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) # Sun C 5.9 + whole_archive_flag_spec='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + compiler_needs_object=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + + if test "x$supports_anon_versioning" = xyes; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test "x$supports_anon_versioning" = xyes; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + ld_shlibs=no + fi + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + ;; + + sunos4*) + archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + + if test "$ld_shlibs" = no; then + runpath_var= + hardcode_libdir_flag_spec= + export_dynamic_flag_spec= + whole_archive_flag_spec= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag=unsupported + always_export_symbols=yes + archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L=yes + if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct=unsupported + fi + ;; + + aix[4-9]*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to AIX nm, but means don't demangle with GNU nm + # Also, AIX nm treats weak defined symbols like other global + # defined symbols, whereas GNU nm marks them as "W". + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then + aix_use_runtimelinking=yes + break + fi + done + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds='' + hardcode_direct=yes + hardcode_direct_absolute=yes + hardcode_libdir_separator=':' + link_all_deplibs=yes + file_list_spec='${wl}-f,' + + if test "$GCC" = yes; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L=yes + hardcode_libdir_flag_spec='-L$libdir' + hardcode_libdir_separator= + fi + ;; + esac + shared_flag='-shared' + if test "$aix_use_runtimelinking" = yes; then + shared_flag="$shared_flag "'${wl}-G' + fi + link_all_deplibs=no + else + # not using gcc + if test "$host_cpu" = ia64; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + export_dynamic_flag_spec='${wl}-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + always_export_symbols=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath_+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_="/usr/lib:/lib" + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" + archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib' + allow_undefined_flag="-z nodefs" + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath_+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_="/usr/lib:/lib" + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag=' ${wl}-bernotok' + allow_undefined_flag=' ${wl}-berok' + if test "$with_gnu_ld" = yes; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec='$convenience' + fi + archive_cmds_need_lc=yes + # This is similar to how AIX traditionally builds its shared libraries. + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + bsdi[45]*) + export_dynamic_flag_spec=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl*) + # Native MSVC + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + always_export_symbols=yes + file_list_spec='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' + archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; + else + sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, )='true' + enable_shared_with_static_runtimes=yes + exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + old_postinstall_cmds='chmod 644 $oldlib' + postlink_cmds='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile="$lt_outputfile.exe" + lt_tool_outputfile="$lt_tool_outputfile.exe" + ;; + esac~ + if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC wrapper + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs' + enable_shared_with_static_runtimes=yes + ;; + esac + ;; + + darwin* | rhapsody*) + + + archive_cmds_need_lc=no + hardcode_direct=no + hardcode_automatic=yes + hardcode_shlibpath_var=unsupported + if test "$lt_cv_ld_force_load" = "yes"; then + whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + + else + whole_archive_flag_spec='' + fi + link_all_deplibs=yes + allow_undefined_flag="$_lt_dar_allow_undefined" + case $cc_basename in + ifort*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test "$_lt_dar_can_shared" = "yes"; then + output_verbose_link_cmd=func_echo_all + archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" + module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" + archive_expsym_cmds="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" + module_expsym_cmds="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" + + else + ld_shlibs=no + fi + + ;; + + dgux*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly*) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + hpux9*) + if test "$GCC" = yes; then + archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + else + archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + fi + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + export_dynamic_flag_spec='${wl}-E' + ;; + + hpux10*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test "$with_gnu_ld" = no; then + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='${wl}-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + fi + ;; + + hpux11*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + case $host_cpu in + hppa*64*) + archive_cmds='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + archive_cmds='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5 +$as_echo_n "checking if $CC understands -b... " >&6; } +if ${lt_cv_prog_compiler__b+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler__b=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -b" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler__b=yes + fi + else + lt_cv_prog_compiler__b=yes + fi + fi + $RM -r conftest* + LDFLAGS="$save_LDFLAGS" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5 +$as_echo "$lt_cv_prog_compiler__b" >&6; } + +if test x"$lt_cv_prog_compiler__b" = xyes; then + archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' +else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' +fi + + ;; + esac + fi + if test "$with_gnu_ld" = no; then + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_separator=: + + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct=no + hardcode_shlibpath_var=no + ;; + *) + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='${wl}-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test "$GCC" = yes; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 +$as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; } +if ${lt_cv_irix_exported_symbol+:} false; then : + $as_echo_n "(cached) " >&6 +else + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo (void) { return 0; } +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_irix_exported_symbol=yes +else + lt_cv_irix_exported_symbol=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS="$save_LDFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 +$as_echo "$lt_cv_irix_exported_symbol" >&6; } + if test "$lt_cv_irix_exported_symbol" = yes; then + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' + fi + else + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + inherit_rpath=yes + link_all_deplibs=yes + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + newsos6) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_shlibpath_var=no + ;; + + *nto* | *qnx*) + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + hardcode_direct=yes + hardcode_shlibpath_var=no + hardcode_direct_absolute=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + export_dynamic_flag_spec='${wl}-E' + else + case $host_os in + openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-R$libdir' + ;; + *) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + ;; + esac + fi + else + ld_shlibs=no + fi + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + archive_cmds='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' + old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' + ;; + + osf3*) + if test "$GCC" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test "$GCC" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec='-rpath $libdir' + fi + archive_cmds_need_lc='no' + hardcode_libdir_separator=: + ;; + + solaris*) + no_undefined_flag=' -z defs' + if test "$GCC" = yes; then + wlarc='${wl}' + archive_cmds='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='${wl}' + archive_cmds='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_shlibpath_var=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. GCC discards it without `$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test "$GCC" = yes; then + whole_archive_flag_spec='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + else + whole_archive_flag_spec='-z allextract$convenience -z defaultextract' + fi + ;; + esac + link_all_deplibs=yes + ;; + + sunos4*) + if test "x$host_vendor" = xsequent; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec='-L$libdir' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds='$CC -r -o $output$reload_objs' + hardcode_direct=no + ;; + motorola) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + sysv4.3*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + export_dynamic_flag_spec='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag='${wl}-z,text' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We can NOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag='${wl}-z,text' + allow_undefined_flag='${wl}-z,nodefs' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='${wl}-R,$libdir' + hardcode_libdir_separator=':' + link_all_deplibs=yes + export_dynamic_flag_spec='${wl}-Bexport' + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + *) + ld_shlibs=no + ;; + esac + + if test x$host_vendor = xsni; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + export_dynamic_flag_spec='${wl}-Blargedynsym' + ;; + esac + fi + fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5 +$as_echo "$ld_shlibs" >&6; } +test "$ld_shlibs" = no && can_build_shared=no + +with_gnu_ld=$with_gnu_ld + + + + + + + + + + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc=yes + + if test "$enable_shared" = yes && test "$GCC" = yes; then + case $archive_cmds in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; } +if ${lt_cv_archive_cmds_need_lc+:} false; then : + $as_echo_n "(cached) " >&6 +else + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl + pic_flag=$lt_prog_compiler_pic + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag + allow_undefined_flag= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc=no + else + lt_cv_archive_cmds_need_lc=yes + fi + allow_undefined_flag=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5 +$as_echo "$lt_cv_archive_cmds_need_lc" >&6; } + archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +$as_echo_n "checking dynamic linker characteristics... " >&6; } + +if test "$GCC" = yes; then + case $host_os in + darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; + *) lt_awk_arg="/^libraries:/" ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq="s,=\([A-Za-z]:\),\1,g" ;; + *) lt_sed_strip_eq="s,=/,/,g" ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" + else + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS=" "; FS="/|\n";} { + lt_foo=""; + lt_count=0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo="/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[lt_foo]++; } + if (lt_freq[lt_foo] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's,/\([A-Za-z]:\),\1,g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=".so" +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='${libname}${release}${shared_ext}$major' + ;; + +aix[4-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test "$host_cpu" = ia64; then + # AIX 5 supports IA64 + library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line `#! .'. This would cause the generated library to + # depend on `.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # AIX (on Power*) has no versioning support, so currently we can not hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + if test "$aix_use_runtimelinking" = yes; then + # If using run time linking (on AIX 4.2 or later) use lib<name>.so + # instead of lib<name>.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + else + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='${libname}${release}.a $libname.a' + soname_spec='${libname}${release}${shared_ext}$major' + fi + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='${libname}${shared_ext}' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=".dll" + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl*) + # Native MSVC + libname_spec='$name' + soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + library_names_spec='${libname}.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec="$LIB" + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC wrapper + library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' + soname_spec='${libname}${release}${major}$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib" + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[23].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=yes + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + if test "X$HPUX_IA64_MODE" = X32; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + fi + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test "$lt_cv_prog_gnu_ld" = yes; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" + sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if ${lt_cv_shlibpath_overrides_runpath+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\"" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Append ld.so.conf contents to the search path + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsdelf*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='NetBSD ld.elf_so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd*) + version_type=sunos + sys_lib_dlsearch_path_spec="/usr/lib" + need_lib_prefix=no + # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. + case $host_os in + openbsd3.3 | openbsd3.3.*) need_version=yes ;; + *) need_version=no ;; + esac + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + case $host_os in + openbsd2.[89] | openbsd2.[89].*) + shlibpath_overrides_runpath=no + ;; + *) + shlibpath_overrides_runpath=yes + ;; + esac + else + shlibpath_overrides_runpath=yes + fi + ;; + +os2*) + libname_spec='$name' + shrext_cmds=".dll" + need_lib_prefix=no + library_names_spec='$libname${shared_ext} $libname.a' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=LIBPATH + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec ;then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' + soname_spec='$libname${shared_ext}.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=freebsd-elf + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test "$with_gnu_ld" = yes; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +$as_echo "$dynamic_linker" >&6; } +test "$dynamic_linker" = no && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test "$GCC" = yes; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then + sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" +fi +if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then + sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +$as_echo_n "checking how to hardcode library paths into programs... " >&6; } +hardcode_action= +if test -n "$hardcode_libdir_flag_spec" || + test -n "$runpath_var" || + test "X$hardcode_automatic" = "Xyes" ; then + + # We can hardcode non-existent directories. + if test "$hardcode_direct" != no && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test "$_LT_TAGVAR(hardcode_shlibpath_var, )" != no && + test "$hardcode_minus_L" != no; then + # Linking always hardcodes the temporary library directory. + hardcode_action=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action=unsupported +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5 +$as_echo "$hardcode_action" >&6; } + +if test "$hardcode_action" = relink || + test "$inherit_rpath" = yes; then + # Fast installation is not supported + enable_fast_install=no +elif test "$shlibpath_overrides_runpath" = yes || + test "$enable_shared" = no; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + if test "x$enable_dlopen" != xyes; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen="load_add_on" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen="LoadLibrary" + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen="dlopen" + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +$as_echo_n "checking for dlopen in -ldl... " >&6; } +if ${ac_cv_lib_dl_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dl_dlopen=yes +else + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +$as_echo "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" +else + + lt_cv_dlopen="dyld" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + +fi + + ;; + + *) + ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load" +if test "x$ac_cv_func_shl_load" = xyes; then : + lt_cv_dlopen="shl_load" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 +$as_echo_n "checking for shl_load in -ldld... " >&6; } +if ${ac_cv_lib_dld_shl_load+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char shl_load (); +int +main () +{ +return shl_load (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dld_shl_load=yes +else + ac_cv_lib_dld_shl_load=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5 +$as_echo "$ac_cv_lib_dld_shl_load" >&6; } +if test "x$ac_cv_lib_dld_shl_load" = xyes; then : + lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld" +else + ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" +if test "x$ac_cv_func_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +$as_echo_n "checking for dlopen in -ldl... " >&6; } +if ${ac_cv_lib_dl_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dl_dlopen=yes +else + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +$as_echo "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 +$as_echo_n "checking for dlopen in -lsvld... " >&6; } +if ${ac_cv_lib_svld_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsvld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_svld_dlopen=yes +else + ac_cv_lib_svld_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5 +$as_echo "$ac_cv_lib_svld_dlopen" >&6; } +if test "x$ac_cv_lib_svld_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 +$as_echo_n "checking for dld_link in -ldld... " >&6; } +if ${ac_cv_lib_dld_dld_link+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dld_link (); +int +main () +{ +return dld_link (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dld_dld_link=yes +else + ac_cv_lib_dld_dld_link=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5 +$as_echo "$ac_cv_lib_dld_dld_link" >&6; } +if test "x$ac_cv_lib_dld_dld_link" = xyes; then : + lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld" +fi + + +fi + + +fi + + +fi + + +fi + + +fi + + ;; + esac + + if test "x$lt_cv_dlopen" != xno; then + enable_dlopen=yes + else + enable_dlopen=no + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS="$CPPFLAGS" + test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS="$LDFLAGS" + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS="$LIBS" + LIBS="$lt_cv_dlopen_libs $LIBS" + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5 +$as_echo_n "checking whether a program can dlopen itself... " >&6; } +if ${lt_cv_dlopen_self+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + lt_cv_dlopen_self=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include <dlfcn.h> +#endif + +#include <stdio.h> + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisbility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self=no + fi +fi +rm -fr conftest* + + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5 +$as_echo "$lt_cv_dlopen_self" >&6; } + + if test "x$lt_cv_dlopen_self" = xyes; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5 +$as_echo_n "checking whether a statically linked program can dlopen itself... " >&6; } +if ${lt_cv_dlopen_self_static+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + lt_cv_dlopen_self_static=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include <dlfcn.h> +#endif + +#include <stdio.h> + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisbility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self_static=no + fi +fi +rm -fr conftest* + + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5 +$as_echo "$lt_cv_dlopen_self_static" >&6; } + fi + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi + + + + + + + + + + + + + + + + + +striplib= +old_striplib= +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5 +$as_echo_n "checking whether stripping libraries is possible... " >&6; } +if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" + test -z "$striplib" && striplib="$STRIP --strip-unneeded" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else +# FIXME - insert some real tests, host_os isn't really good enough + case $host_os in + darwin*) + if test -n "$STRIP" ; then + striplib="$STRIP -x" + old_striplib="$STRIP -S" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + fi + ;; + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + ;; + esac +fi + + + + + + + + + + + + + # Report which library types will actually be built + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 +$as_echo_n "checking if libtool supports shared libraries... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 +$as_echo "$can_build_shared" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 +$as_echo_n "checking whether to build shared libraries... " >&6; } + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[4-9]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 +$as_echo "$enable_shared" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 +$as_echo_n "checking whether to build static libraries... " >&6; } + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 +$as_echo "$enable_static" >&6; } + + + + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +CC="$lt_save_CC" + + + + + + + + + + + + + + + + ac_config_commands="$ac_config_commands libtool" + + + + +# Only expand once: + + +# By default we simply use the C compiler to build assembly code. + +test "${CCAS+set}" = set || CCAS=$CC +test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS + + + +depcc="$CCAS" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CCAS_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CCAS_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CCAS_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CCAS_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CCAS_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CCAS_dependencies_compiler_type" >&6; } +CCASDEPMODE=depmode=$am_cv_CCAS_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CCAS_dependencies_compiler_type" = gcc3; then + am__fastdepCCAS_TRUE= + am__fastdepCCAS_FALSE='#' +else + am__fastdepCCAS_TRUE='#' + am__fastdepCCAS_FALSE= +fi + + + + +# Check whether --with-dpdk was given. +if test "${with_dpdk+set}" = set; then : + withval=$with_dpdk; with_dpdk=1 +else + with_dpdk=0 +fi + + + +# Check whether --with-virl was given. +if test "${with_virl+set}" = set; then : + withval=$with_virl; with_virl=1 +else + with_virl=0 +fi + + + if test "$with_dpdk" = "1"; then + WITH_DPDK_TRUE= + WITH_DPDK_FALSE='#' +else + WITH_DPDK_TRUE='#' + WITH_DPDK_FALSE= +fi + +DPDK=-DDPDK=${with_dpdk} + + + if test "$with_virl" = "1"; then + WITH_VIRL_TRUE= + WITH_VIRL_FALSE='#' +else + WITH_VIRL_TRUE='#' + WITH_VIRL_FALSE= +fi + +VIRL=-DVIRL=${with_virl} + + +ac_config_files="$ac_config_files Makefile" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# Transform confdefs.h into DEFS. +# Protect against shell expansion while executing Makefile rules. +# Protect against Makefile macro expansion. +# +# If the first sed substitution is executed (which looks for macros that +# take arguments), then branch to the quote section. Otherwise, +# look for a macro that doesn't take arguments. +ac_script=' +:mline +/\\$/{ + N + s,\\\n,, + b mline +} +t clear +:clear +s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g +t quote +s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g +t quote +b any +:quote +s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g +s/\[/\\&/g +s/\]/\\&/g +s/\$/$$/g +H +:any +${ + g + s/^\n// + s/\n/ /g + p +} +' +DEFS=`sed -n "$ac_script" confdefs.h` + + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +$as_echo_n "checking that generated files are newer than configure... " >&6; } + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 +$as_echo "done" >&6; } + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + as_fn_error $? "conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCCAS_TRUE}" && test -z "${am__fastdepCCAS_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCCAS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_DPDK_TRUE}" && test -z "${WITH_DPDK_FALSE}"; then + as_fn_error $? "conditional \"WITH_DPDK\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_VIRL_TRUE}" && test -z "${WITH_VIRL_FALSE}"; then + as_fn_error $? "conditional \"WITH_VIRL\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by vnet $as_me 1.1, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + +Configuration files: +$config_files + +Configuration commands: +$config_commands + +Report bugs to the package provider." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +vnet config.status 1.1 +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h | --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir" + + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' +macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' +enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' +enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' +pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' +enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' +SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`' +ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`' +PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`' +host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`' +host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`' +host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`' +build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`' +build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`' +build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`' +SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`' +Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`' +GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`' +EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`' +FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`' +LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`' +NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`' +LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`' +max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`' +ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`' +exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`' +lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`' +lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`' +lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`' +lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`' +lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`' +reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`' +reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`' +OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`' +deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`' +file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`' +file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`' +want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`' +DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`' +sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`' +AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`' +AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`' +archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`' +STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`' +RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`' +old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`' +old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`' +lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`' +CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`' +CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`' +compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`' +GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`' +nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`' +lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`' +objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`' +MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`' +need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`' +MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`' +DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`' +NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`' +LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`' +OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`' +OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`' +libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`' +shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`' +extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`' +compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`' +module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`' +with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`' +no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`' +hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`' +hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`' +inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`' +link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`' +always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`' +exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`' +include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`' +prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`' +postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`' +file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`' +variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`' +need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`' +need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`' +version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`' +runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`' +libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`' +library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`' +soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`' +install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`' +postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`' +postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`' +finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`' +hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`' +sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`' +sys_lib_dlsearch_path_spec='`$ECHO "$sys_lib_dlsearch_path_spec" | $SED "$delay_single_quote_subst"`' +hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`' +enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`' +old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`' +striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`' + +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in SHELL \ +ECHO \ +PATH_SEPARATOR \ +SED \ +GREP \ +EGREP \ +FGREP \ +LD \ +NM \ +LN_S \ +lt_SP2NL \ +lt_NL2SP \ +reload_flag \ +OBJDUMP \ +deplibs_check_method \ +file_magic_cmd \ +file_magic_glob \ +want_nocaseglob \ +DLLTOOL \ +sharedlib_from_linklib_cmd \ +AR \ +AR_FLAGS \ +archiver_list_spec \ +STRIP \ +RANLIB \ +CC \ +CFLAGS \ +compiler \ +lt_cv_sys_global_symbol_pipe \ +lt_cv_sys_global_symbol_to_cdecl \ +lt_cv_sys_global_symbol_to_c_name_address \ +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \ +nm_file_list_spec \ +lt_prog_compiler_no_builtin_flag \ +lt_prog_compiler_pic \ +lt_prog_compiler_wl \ +lt_prog_compiler_static \ +lt_cv_prog_compiler_c_o \ +need_locks \ +MANIFEST_TOOL \ +DSYMUTIL \ +NMEDIT \ +LIPO \ +OTOOL \ +OTOOL64 \ +shrext_cmds \ +export_dynamic_flag_spec \ +whole_archive_flag_spec \ +compiler_needs_object \ +with_gnu_ld \ +allow_undefined_flag \ +no_undefined_flag \ +hardcode_libdir_flag_spec \ +hardcode_libdir_separator \ +exclude_expsyms \ +include_expsyms \ +file_list_spec \ +variables_saved_for_relink \ +libname_spec \ +library_names_spec \ +soname_spec \ +install_override_mode \ +finish_eval \ +old_striplib \ +striplib; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in reload_cmds \ +old_postinstall_cmds \ +old_postuninstall_cmds \ +old_archive_cmds \ +extract_expsyms_cmds \ +old_archive_from_new_cmds \ +old_archive_from_expsyms_cmds \ +archive_cmds \ +archive_expsym_cmds \ +module_cmds \ +module_expsym_cmds \ +export_symbols_cmds \ +prelink_cmds \ +postlink_cmds \ +postinstall_cmds \ +postuninstall_cmds \ +finish_cmds \ +sys_lib_search_path_spec \ +sys_lib_dlsearch_path_spec; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +ac_aux_dir='$ac_aux_dir' +xsi_shell='$xsi_shell' +lt_shell_append='$lt_shell_append' + +# See if we are running on zsh, and set the options which allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi + + + PACKAGE='$PACKAGE' + VERSION='$VERSION' + TIMESTAMP='$TIMESTAMP' + RM='$RM' + ofile='$ofile' + + + + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' <conf$$subs.awk | sed ' +/^[^""]/{ + N + s/\n// +} +' >>$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + + +eval set X " :F $CONFIG_FILES :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + + + :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +$as_echo "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + case $CONFIG_FILES in + *\'*) eval set x "$CONFIG_FILES" ;; + *) set x $CONFIG_FILES ;; + esac + shift + for mf + do + # Strip MF so we end up with the name of the file. + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named 'Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # Grep'ing the whole file is not good either: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then + dirpart=`$as_dirname -- "$mf" || +$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$mf" : 'X\(//\)[^/]' \| \ + X"$mf" : 'X\(//\)$' \| \ + X"$mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running 'make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "$am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`$as_dirname -- "$file" || +$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$file" : 'X\(//\)[^/]' \| \ + X"$file" : 'X\(//\)$' \| \ + X"$file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir=$dirpart/$fdir; as_fn_mkdir_p + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done + done +} + ;; + "libtool":C) + + # See if we are running on zsh, and set the options which allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST + fi + + cfgfile="${ofile}T" + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL + +# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. +# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION +# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: +# NOTE: Changes made to this file will be lost: look at ltmain.sh. +# +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, +# 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is part of GNU Libtool. +# +# GNU Libtool is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Libtool; see the file COPYING. If not, a copy +# can be downloaded from http://www.gnu.org/licenses/gpl.html, or +# obtained by writing to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + +# The names of the tagged configurations supported by this script. +available_tags="" + +# ### BEGIN LIBTOOL CONFIG + +# Which release of libtool.m4 was used? +macro_version=$macro_version +macro_revision=$macro_revision + +# Whether or not to build shared libraries. +build_libtool_libs=$enable_shared + +# Whether or not to build static libraries. +build_old_libs=$enable_static + +# What type of objects to build. +pic_mode=$pic_mode + +# Whether or not to optimize for fast installation. +fast_install=$enable_fast_install + +# Shell to use when invoking shell scripts. +SHELL=$lt_SHELL + +# An echo program that protects backslashes. +ECHO=$lt_ECHO + +# The PATH separator for the build system. +PATH_SEPARATOR=$lt_PATH_SEPARATOR + +# The host system. +host_alias=$host_alias +host=$host +host_os=$host_os + +# The build system. +build_alias=$build_alias +build=$build +build_os=$build_os + +# A sed program that does not truncate output. +SED=$lt_SED + +# Sed that helps us avoid accidentally triggering echo(1) options like -n. +Xsed="\$SED -e 1s/^X//" + +# A grep program that handles long lines. +GREP=$lt_GREP + +# An ERE matcher. +EGREP=$lt_EGREP + +# A literal string matcher. +FGREP=$lt_FGREP + +# A BSD- or MS-compatible name lister. +NM=$lt_NM + +# Whether we need soft or hard links. +LN_S=$lt_LN_S + +# What is the maximum length of a command? +max_cmd_len=$max_cmd_len + +# Object file suffix (normally "o"). +objext=$ac_objext + +# Executable file suffix (normally ""). +exeext=$exeext + +# whether the shell understands "unset". +lt_unset=$lt_unset + +# turn spaces into newlines. +SP2NL=$lt_lt_SP2NL + +# turn newlines into spaces. +NL2SP=$lt_lt_NL2SP + +# convert \$build file names to \$host format. +to_host_file_cmd=$lt_cv_to_host_file_cmd + +# convert \$build files to toolchain format. +to_tool_file_cmd=$lt_cv_to_tool_file_cmd + +# An object symbol dumper. +OBJDUMP=$lt_OBJDUMP + +# Method to check whether dependent libraries are shared objects. +deplibs_check_method=$lt_deplibs_check_method + +# Command to use when deplibs_check_method = "file_magic". +file_magic_cmd=$lt_file_magic_cmd + +# How to find potential files when deplibs_check_method = "file_magic". +file_magic_glob=$lt_file_magic_glob + +# Find potential files using nocaseglob when deplibs_check_method = "file_magic". +want_nocaseglob=$lt_want_nocaseglob + +# DLL creation program. +DLLTOOL=$lt_DLLTOOL + +# Command to associate shared and link libraries. +sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd + +# The archiver. +AR=$lt_AR + +# Flags to create an archive. +AR_FLAGS=$lt_AR_FLAGS + +# How to feed a file listing to the archiver. +archiver_list_spec=$lt_archiver_list_spec + +# A symbol stripping program. +STRIP=$lt_STRIP + +# Commands used to install an old-style archive. +RANLIB=$lt_RANLIB +old_postinstall_cmds=$lt_old_postinstall_cmds +old_postuninstall_cmds=$lt_old_postuninstall_cmds + +# Whether to use a lock for old archive extraction. +lock_old_archive_extraction=$lock_old_archive_extraction + +# A C compiler. +LTCC=$lt_CC + +# LTCC compiler flags. +LTCFLAGS=$lt_CFLAGS + +# Take the output of nm and produce a listing of raw symbols and C names. +global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe + +# Transform the output of nm in a proper C declaration. +global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl + +# Transform the output of nm in a C name address pair. +global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address + +# Transform the output of nm in a C name address pair when lib prefix is needed. +global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix + +# Specify filename containing input files for \$NM. +nm_file_list_spec=$lt_nm_file_list_spec + +# The root where to search for dependent libraries,and in which our libraries should be installed. +lt_sysroot=$lt_sysroot + +# The name of the directory that contains temporary libtool files. +objdir=$objdir + +# Used to examine libraries when file_magic_cmd begins with "file". +MAGIC_CMD=$MAGIC_CMD + +# Must we lock files when doing compilation? +need_locks=$lt_need_locks + +# Manifest tool. +MANIFEST_TOOL=$lt_MANIFEST_TOOL + +# Tool to manipulate archived DWARF debug symbol files on Mac OS X. +DSYMUTIL=$lt_DSYMUTIL + +# Tool to change global to local symbols on Mac OS X. +NMEDIT=$lt_NMEDIT + +# Tool to manipulate fat objects and archives on Mac OS X. +LIPO=$lt_LIPO + +# ldd/readelf like tool for Mach-O binaries on Mac OS X. +OTOOL=$lt_OTOOL + +# ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4. +OTOOL64=$lt_OTOOL64 + +# Old archive suffix (normally "a"). +libext=$libext + +# Shared library suffix (normally ".so"). +shrext_cmds=$lt_shrext_cmds + +# The commands to extract the exported symbol list from a shared archive. +extract_expsyms_cmds=$lt_extract_expsyms_cmds + +# Variables whose values should be saved in libtool wrapper scripts and +# restored at link time. +variables_saved_for_relink=$lt_variables_saved_for_relink + +# Do we need the "lib" prefix for modules? +need_lib_prefix=$need_lib_prefix + +# Do we need a version for libraries? +need_version=$need_version + +# Library versioning type. +version_type=$version_type + +# Shared library runtime path variable. +runpath_var=$runpath_var + +# Shared library path variable. +shlibpath_var=$shlibpath_var + +# Is shlibpath searched before the hard-coded library search path? +shlibpath_overrides_runpath=$shlibpath_overrides_runpath + +# Format of library name prefix. +libname_spec=$lt_libname_spec + +# List of archive names. First name is the real one, the rest are links. +# The last name is the one that the linker finds with -lNAME +library_names_spec=$lt_library_names_spec + +# The coded name of the library, if different from the real name. +soname_spec=$lt_soname_spec + +# Permission mode override for installation of shared libraries. +install_override_mode=$lt_install_override_mode + +# Command to use after installation of a shared archive. +postinstall_cmds=$lt_postinstall_cmds + +# Command to use after uninstallation of a shared archive. +postuninstall_cmds=$lt_postuninstall_cmds + +# Commands used to finish a libtool library installation in a directory. +finish_cmds=$lt_finish_cmds + +# As "finish_cmds", except a single script fragment to be evaled but +# not shown. +finish_eval=$lt_finish_eval + +# Whether we should hardcode library paths into libraries. +hardcode_into_libs=$hardcode_into_libs + +# Compile-time system search path for libraries. +sys_lib_search_path_spec=$lt_sys_lib_search_path_spec + +# Run-time system search path for libraries. +sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec + +# Whether dlopen is supported. +dlopen_support=$enable_dlopen + +# Whether dlopen of programs is supported. +dlopen_self=$enable_dlopen_self + +# Whether dlopen of statically linked programs is supported. +dlopen_self_static=$enable_dlopen_self_static + +# Commands to strip libraries. +old_striplib=$lt_old_striplib +striplib=$lt_striplib + + +# The linker used to build libraries. +LD=$lt_LD + +# How to create reloadable object files. +reload_flag=$lt_reload_flag +reload_cmds=$lt_reload_cmds + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds + +# A language specific compiler. +CC=$lt_compiler + +# Is the compiler the GNU compiler? +with_gcc=$GCC + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds +archive_expsym_cmds=$lt_archive_expsym_cmds + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds +module_expsym_cmds=$lt_module_expsym_cmds + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator + +# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct + +# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \${shlibpath_var} if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action + +# ### END LIBTOOL CONFIG + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + +ltmain="$ac_aux_dir/ltmain.sh" + + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + sed '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + if test x"$xsi_shell" = xyes; then + sed -e '/^func_dirname ()$/,/^} # func_dirname /c\ +func_dirname ()\ +{\ +\ case ${1} in\ +\ */*) func_dirname_result="${1%/*}${2}" ;;\ +\ * ) func_dirname_result="${3}" ;;\ +\ esac\ +} # Extended-shell func_dirname implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_basename ()$/,/^} # func_basename /c\ +func_basename ()\ +{\ +\ func_basename_result="${1##*/}"\ +} # Extended-shell func_basename implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_dirname_and_basename ()$/,/^} # func_dirname_and_basename /c\ +func_dirname_and_basename ()\ +{\ +\ case ${1} in\ +\ */*) func_dirname_result="${1%/*}${2}" ;;\ +\ * ) func_dirname_result="${3}" ;;\ +\ esac\ +\ func_basename_result="${1##*/}"\ +} # Extended-shell func_dirname_and_basename implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_stripname ()$/,/^} # func_stripname /c\ +func_stripname ()\ +{\ +\ # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are\ +\ # positional parameters, so assign one to ordinary parameter first.\ +\ func_stripname_result=${3}\ +\ func_stripname_result=${func_stripname_result#"${1}"}\ +\ func_stripname_result=${func_stripname_result%"${2}"}\ +} # Extended-shell func_stripname implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_split_long_opt ()$/,/^} # func_split_long_opt /c\ +func_split_long_opt ()\ +{\ +\ func_split_long_opt_name=${1%%=*}\ +\ func_split_long_opt_arg=${1#*=}\ +} # Extended-shell func_split_long_opt implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_split_short_opt ()$/,/^} # func_split_short_opt /c\ +func_split_short_opt ()\ +{\ +\ func_split_short_opt_arg=${1#??}\ +\ func_split_short_opt_name=${1%"$func_split_short_opt_arg"}\ +} # Extended-shell func_split_short_opt implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_lo2o ()$/,/^} # func_lo2o /c\ +func_lo2o ()\ +{\ +\ case ${1} in\ +\ *.lo) func_lo2o_result=${1%.lo}.${objext} ;;\ +\ *) func_lo2o_result=${1} ;;\ +\ esac\ +} # Extended-shell func_lo2o implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_xform ()$/,/^} # func_xform /c\ +func_xform ()\ +{\ + func_xform_result=${1%.*}.lo\ +} # Extended-shell func_xform implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_arith ()$/,/^} # func_arith /c\ +func_arith ()\ +{\ + func_arith_result=$(( $* ))\ +} # Extended-shell func_arith implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_len ()$/,/^} # func_len /c\ +func_len ()\ +{\ + func_len_result=${#1}\ +} # Extended-shell func_len implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + +fi + +if test x"$lt_shell_append" = xyes; then + sed -e '/^func_append ()$/,/^} # func_append /c\ +func_append ()\ +{\ + eval "${1}+=\\${2}"\ +} # Extended-shell func_append implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_append_quoted ()$/,/^} # func_append_quoted /c\ +func_append_quoted ()\ +{\ +\ func_quote_for_eval "${2}"\ +\ eval "${1}+=\\\\ \\$func_quote_for_eval_result"\ +} # Extended-shell func_append_quoted implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + # Save a `func_append' function call where possible by direct use of '+=' + sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +else + # Save a `func_append' function call even when '+=' is not available + sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +fi + +if test x"$_lt_function_replace_fail" = x":"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unable to substitute extended shell functions in $ofile" >&5 +$as_echo "$as_me: WARNING: Unable to substitute extended shell functions in $ofile" >&2;} +fi + + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" + + ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/vnet/configure.ac b/vnet/configure.ac new file mode 100644 index 00000000000..a570389e3fb --- /dev/null +++ b/vnet/configure.ac @@ -0,0 +1,26 @@ +AC_INIT(vnet, 1.1) +AM_INIT_AUTOMAKE +AM_SILENT_RULES + +AC_PROG_CC +AM_PROG_CC_C_O +AC_PROG_LIBTOOL +AM_PROG_AS + +AC_ARG_WITH(dpdk, + AC_HELP_STRING([--with-dpdk],[Use the Intel dpdk]), + [with_dpdk=1], + [with_dpdk=0]) + +AC_ARG_WITH(virl, + AC_HELP_STRING([--with-virl],[Compile for virl citizenship]), + [with_virl=1], + [with_virl=0]) + +AM_CONDITIONAL(WITH_DPDK, test "$with_dpdk" = "1") +AC_SUBST(DPDK,[-DDPDK=${with_dpdk}]) + +AM_CONDITIONAL(WITH_VIRL, test "$with_virl" = "1") +AC_SUBST(VIRL,[-DVIRL=${with_virl}]) + +AC_OUTPUT([Makefile]) diff --git a/vnet/depcomp b/vnet/depcomp new file mode 100755 index 00000000000..4ebd5b3a2f2 --- /dev/null +++ b/vnet/depcomp @@ -0,0 +1,791 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2013-05-30.07; # UTC + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>. + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by 'PROGRAMS ARGS'. + object Object file output by 'PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputting dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to <bug-automake@gnu.org>. +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +# Get the directory component of the given path, and save it in the +# global variables '$dir'. Note that this directory component will +# be either empty or ending with a '/' character. This is deliberate. +set_dir_from () +{ + case $1 in + */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; + *) dir=;; + esac +} + +# Get the suffix-stripped basename of the given path, and save it the +# global variable '$base'. +set_base_from () +{ + base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` +} + +# If no dependency file was actually created by the compiler invocation, +# we still have to create a dummy depfile, to avoid errors with the +# Makefile "include basename.Plo" scheme. +make_dummy_depfile () +{ + echo "#dummy" > "$depfile" +} + +# Factor out some common post-processing of the generated depfile. +# Requires the auxiliary global variable '$tmpdepfile' to be set. +aix_post_process_depfile () +{ + # If the compiler actually managed to produce a dependency file, + # post-process it. + if test -f "$tmpdepfile"; then + # Each line is of the form 'foo.o: dependency.h'. + # Do two passes, one to just change these to + # $object: dependency.h + # and one to simply output + # dependency.h: + # which is needed to avoid the deleted-header problem. + { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" + sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" + } > "$depfile" + rm -f "$tmpdepfile" + else + make_dummy_depfile + fi +} + +# A tabulation character. +tab=' ' +# A newline character. +nl=' +' +# Character ranges might be problematic outside the C locale. +# These definitions help. +upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ +lower=abcdefghijklmnopqrstuvwxyz +digits=0123456789 +alpha=${upper}${lower} + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Avoid interferences from the environment. +gccflag= dashmflag= + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +cygpath_u="cygpath -u -f -" +if test "$depmode" = msvcmsys; then + # This is just like msvisualcpp but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvisualcpp +fi + +if test "$depmode" = msvc7msys; then + # This is just like msvc7 but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvc7 +fi + +if test "$depmode" = xlc; then + # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. + gccflag=-qmakedep=gcc,-MF + depmode=gcc +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. +## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. +## (see the conditional assignment to $gccflag above). +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). Also, it might not be +## supported by the other compilers which use the 'gcc' depmode. +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The second -e expression handles DOS-style file names with drive + # letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the "deleted header file" problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. +## Some versions of gcc put a space before the ':'. On the theory +## that the space means something, we add a space to the output as +## well. hp depmode also adds that space, but also prefixes the VPATH +## to the object. Take care to not repeat it in the output. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like '#:fec' to the end of the + # dependency line. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ + | tr "$nl" ' ' >> "$depfile" + echo >> "$depfile" + # The second pass generates a dummy entry for each header file. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" + ;; + +xlc) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts '$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.u + tmpdepfile2=$base.u + tmpdepfile3=$dir.libs/$base.u + "$@" -Wc,-M + else + tmpdepfile1=$dir$base.u + tmpdepfile2=$dir$base.u + tmpdepfile3=$dir$base.u + "$@" -M + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + aix_post_process_depfile + ;; + +tcc) + # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 + # FIXME: That version still under development at the moment of writing. + # Make that this statement remains true also for stable, released + # versions. + # It will wrap lines (doesn't matter whether long or short) with a + # trailing '\', as in: + # + # foo.o : \ + # foo.c \ + # foo.h \ + # + # It will put a trailing '\' even on the last line, and will use leading + # spaces rather than leading tabs (at least since its commit 0394caf7 + # "Emit spaces for -MD"). + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. + # We have to change lines of the first kind to '$object: \'. + sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" + # And for each line of the second kind, we have to emit a 'dep.h:' + # dummy dependency, to avoid the deleted-header problem. + sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" + rm -f "$tmpdepfile" + ;; + +## The order of this option in the case statement is important, since the +## shell code in configure will try each of these formats in the order +## listed in this file. A plain '-MD' option would be understood by many +## compilers, so we must ensure this comes after the gcc and icc options. +pgcc) + # Portland's C compiler understands '-MD'. + # Will always output deps to 'file.d' where file is the root name of the + # source file under compilation, even if file resides in a subdirectory. + # The object file name does not affect the name of the '.d' file. + # pgcc 10.2 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using '\' : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + set_dir_from "$object" + # Use the source, not the object, to determine the base name, since + # that's sadly what pgcc will do too. + set_base_from "$source" + tmpdepfile=$base.d + + # For projects that build the same source file twice into different object + # files, the pgcc approach of using the *source* file root name can cause + # problems in parallel builds. Use a locking strategy to avoid stomping on + # the same $tmpdepfile. + lockdir=$base.d-lock + trap " + echo '$0: caught signal, cleaning up...' >&2 + rmdir '$lockdir' + exit 1 + " 1 2 13 15 + numtries=100 + i=$numtries + while test $i -gt 0; do + # mkdir is a portable test-and-set. + if mkdir "$lockdir" 2>/dev/null; then + # This process acquired the lock. + "$@" -MD + stat=$? + # Release the lock. + rmdir "$lockdir" + break + else + # If the lock is being held by a different process, wait + # until the winning process is done or we timeout. + while test -d "$lockdir" && test $i -gt 0; do + sleep 1 + i=`expr $i - 1` + done + fi + i=`expr $i - 1` + done + trap - 1 2 13 15 + if test $i -le 0; then + echo "$0: failed to acquire lock after $numtries attempts" >&2 + echo "$0: check lockdir '$lockdir'" >&2 + exit 1 + fi + + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" + # Add 'dependent.h:' lines. + sed -ne '2,${ + s/^ *// + s/ \\*$// + s/$/:/ + p + }' "$tmpdepfile" >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in 'foo.d' instead, so we check for that too. + # Subdirectories are respected. + set_dir_from "$object" + set_base_from "$object" + + if test "$libtool" = yes; then + # Libtool generates 2 separate objects for the 2 libraries. These + # two compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir$base.o.d # libtool 1.5 + tmpdepfile2=$dir.libs/$base.o.d # Likewise. + tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + # Same post-processing that is required for AIX mode. + aix_post_process_depfile + ;; + +msvc7) + if test "$libtool" = yes; then + showIncludes=-Wc,-showIncludes + else + showIncludes=-showIncludes + fi + "$@" $showIncludes > "$tmpdepfile" + stat=$? + grep -v '^Note: including file: ' "$tmpdepfile" + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The first sed program below extracts the file names and escapes + # backslashes for cygpath. The second sed program outputs the file + # name when reading, but also accumulates all include files in the + # hold buffer in order to output them again at the end. This only + # works with sed implementations that can handle large buffers. + sed < "$tmpdepfile" -n ' +/^Note: including file: *\(.*\)/ { + s//\1/ + s/\\/\\\\/g + p +}' | $cygpath_u | sort -u | sed -n ' +s/ /\\ /g +s/\(.*\)/'"$tab"'\1 \\/p +s/.\(.*\) \\/\1:/ +H +$ { + s/.*/'"$tab"'/ + G + p +}' >> "$depfile" + echo >> "$depfile" # make sure the fragment doesn't end with a backslash + rm -f "$tmpdepfile" + ;; + +msvc7msys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for ':' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. + "$@" $dashmflag | + sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this sed invocation + # correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no eat=no + for arg + do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + if test $eat = yes; then + eat=no + continue + fi + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -arch) + eat=yes ;; + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix=`echo "$object" | sed 's/^.*\././'` + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + # makedepend may prepend the VPATH from the source file name to the object. + # No need to regex-escape $object, excess matching of '.' is harmless. + sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process the last invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed '1,2d' "$tmpdepfile" \ + | tr ' ' "$nl" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E \ + | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + | sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + IFS=" " + for arg + do + case "$arg" in + -o) + shift + ;; + $object) + shift + ;; + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E 2>/dev/null | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" + echo "$tab" >> "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvcmsys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/vnet/etc/scripts/arp4 b/vnet/etc/scripts/arp4 new file mode 100644 index 00000000000..acb20da33c8 --- /dev/null +++ b/vnet/etc/scripts/arp4 @@ -0,0 +1,21 @@ +packet-generator new { + name x + limit 1 + node ip4-input + size 64-64 + no-recycle + data { + ICMP: 1.0.0.2 -> 2.0.0.2 + ICMP echo_request + incrementing 100 + } +} + +trace add pg-input 100 +loop create +loop create +set int state loop0 up +set int state loop1 up + +set int ip address loop0 1.0.0.1/24 +set int ip address loop1 2.0.0.1/24 diff --git a/vnet/etc/scripts/arp6 b/vnet/etc/scripts/arp6 new file mode 100644 index 00000000000..e6a98935309 --- /dev/null +++ b/vnet/etc/scripts/arp6 @@ -0,0 +1,21 @@ +packet-generator new { + name x + limit 1 + node ip6-input + size 64-64 + no-recycle + data { + ICMP6: 2000::2 -> 2001::2 + ICMP echo_request + incrementing 100 + } +} + +tr add pg-input 100 +loop create +loop create +set int state loop0 up +set int state loop1 up + +set int ip address loop0 2000::1/64 +set int ip address loop1 2001::1/64 diff --git a/vnet/etc/scripts/bvi b/vnet/etc/scripts/bvi new file mode 100644 index 00000000000..2174da0db63 --- /dev/null +++ b/vnet/etc/scripts/bvi @@ -0,0 +1,76 @@ + + +set int state tuntap-0 down +set int ip address GigabitEthernet2/1/0 1.2.3.4/24 + +set int state GigabitEthernet2/1/0 up +cre sub GigabitEthernet2/1/0 1 dot1q 7 +set int state GigabitEthernet2/1/0.1 up + +set int state GigabitEthernet2/2/0 up +cre sub GigabitEthernet2/2/0 1 dot1q 9 +set int state GigabitEthernet2/2/0.1 up + + +loop create +set int l2 bridge loop0 0 bvi +set int ip table loop0 0 +set int state loop0 up + + +set int l2 bridge GigabitEthernet2/1/0.1 0 +set int l2 bridge GigabitEthernet2/2/0.1 0 + +set int l2 tag-rewrite GigabitEthernet2/1/0.1 pop 1 +set int l2 tag-rewrite GigabitEthernet2/2/0.1 pop 1 + +l2fib add 00:22:44:06:08:0a 0 GigabitEthernet2/1/0.1 static +l2fib add 00:02:04:06:08:0a 0 GigabitEthernet2/2/0.1 static + + +ip route table 0 8.0.0.1/32 via loop0 +set ip arp loop0 8.0.0.1 00:02:04:06:08:0a + + +ip route add 1.2.3.3/32 via GigabitEthernet2/1/0 IP4: 00:15:17:61:73:47 -> 00:15:17:61:73:46 + +cle er +cle int +cle run + +packet-generator new { + name bvi_to_l2 + limit 100 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + data { + IP4: 0050.56b7.7c83 -> 0050.56b7.296d + GRE: 1.2.3.3 -> 8.0.0.1 mpls_unicast + } +} + +packet-generator new { + name l2_to_bvi + limit 50 + no-recycle + node ethernet-input + interface GigabitEthernet2/2/0 + data { + IP4: 0050.56b7.7c83 -> dead.0000.0000 vlan 9 + GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast + } +} + +packet-generator new { + name l2_to_bvi_via_flood + limit 25 + no-recycle + node ethernet-input + interface GigabitEthernet2/2/0 + data { + IP4: 0050.56b7.7c83 -> ffff.ffff.ffff vlan 9 + GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast + } +} + diff --git a/vnet/etc/scripts/dhcp/ludd-cluster-1 b/vnet/etc/scripts/dhcp/ludd-cluster-1 new file mode 100644 index 00000000000..458eecf0caf --- /dev/null +++ b/vnet/etc/scripts/dhcp/ludd-cluster-1 @@ -0,0 +1,17 @@ +set int ip table GigabitEthernet2/2/0 12 +set int ip address GigabitEthernet2/2/0 192.168.1.1/24 +set int state GigabitEthernet2/2/0 up + +set int ip table GigabitEthernet2/7/0 11 +set int ip address GigabitEthernet2/7/0 192.168.2.1/24 +set int state GigabitEthernet2/7/0 up + +comment { set dhcp proxy server 1.2.3.4 src-address 1.2.3.5 add-option-82 rx-fib-id 0 server-fib-id 0 } + +comment { set dhcp proxy server 192.168.2.2 src-address 192.168.2.1 add-option-82 rx-fib-id 12 server-fib-id 11 } + +ip route add 0.0.0.0/24 table 11 via local +ip route add 255.255.255.255/24 table 11 via local + +ip route add 0.0.0.0/24 table 12 via local +ip route add 255.255.255.255/24 table 12 via local diff --git a/vnet/etc/scripts/dhcp/ludd-cluster-2-sh b/vnet/etc/scripts/dhcp/ludd-cluster-2-sh new file mode 100644 index 00000000000..32924ecc5e5 --- /dev/null +++ b/vnet/etc/scripts/dhcp/ludd-cluster-2-sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# to obtain dhcp address from ludd-cluster-1 +dhclient -d -v eth1 diff --git a/vnet/etc/scripts/dhcp/ludd-cluster-5-dhcpd.conf b/vnet/etc/scripts/dhcp/ludd-cluster-5-dhcpd.conf new file mode 100644 index 00000000000..d4cb3ed718a --- /dev/null +++ b/vnet/etc/scripts/dhcp/ludd-cluster-5-dhcpd.conf @@ -0,0 +1,8 @@ +# add at the bottom + +subnet 192.168.0.0 netmask 255.255.0.0 { + range 192.168.1.10 192.168.1.254; + option routers 192.168.1.1; + default-lease-time 15; + max-lease-time 15; +} diff --git a/vnet/etc/scripts/dhcp/ludd-cluster-5-isc-dhcp-server b/vnet/etc/scripts/dhcp/ludd-cluster-5-isc-dhcp-server new file mode 100644 index 00000000000..6fafc1eec13 --- /dev/null +++ b/vnet/etc/scripts/dhcp/ludd-cluster-5-isc-dhcp-server @@ -0,0 +1,3 @@ +# add eth6 to interfaces + +INTERFACES="eth6" diff --git a/vnet/etc/scripts/icmp b/vnet/etc/scripts/icmp new file mode 100644 index 00000000000..1e054e2df4a --- /dev/null +++ b/vnet/etc/scripts/icmp @@ -0,0 +1,16 @@ +packet-generator new { + name x + limit 1 + node ip4-input + size 64-64 + no-recycle + data { + ICMP: 1.2.3.4 -> 5.6.7.8 + ICMP echo_request + incrementing 100 + } +} + +tr add pg-input 100 +ip route 5.6.7.8/32 via local +ip route 1.2.3.4/32 via local diff --git a/vnet/etc/scripts/icmp6 b/vnet/etc/scripts/icmp6 new file mode 100644 index 00000000000..2a65acba256 --- /dev/null +++ b/vnet/etc/scripts/icmp6 @@ -0,0 +1,16 @@ +packet-generator new { + name x + limit 1 + node ip6-input + size 64-64 + no-recycle + data { + ICMP6: ::1 -> ::2 + ICMP echo_request + incrementing 100 + } +} + +tr add pg-input 100 +ip route ::1/128 via local +ip route ::2/128 via local diff --git a/vnet/etc/scripts/ige b/vnet/etc/scripts/ige new file mode 100644 index 00000000000..80d045af36d --- /dev/null +++ b/vnet/etc/scripts/ige @@ -0,0 +1,19 @@ +packet-generator new { + name x + limit 1 + node ip4-lookup + size 50-50 + data { + ICMP: 1.0.0.1 -> 1.0.0.3 ttl 1 + incrementing 30 + } +} + +comment { tr add pg-input 100 } +set int ip address GigabitEthernet4/0/0 1.0.0.1/24 +set int ip address GigabitEthernet4/0/1 1.0.0.2/24 +set int state GigabitEthernet4/0/0 up +set int state GigabitEthernet4/0/1 up + +ip route add 1.0.0.3/32 via GigabitEthernet4/0/1 IP4: 00:15:17:61:73:47 -> 00:15:17:61:73:46 +tr add ige-input 10 diff --git a/vnet/etc/scripts/ip6 b/vnet/etc/scripts/ip6 new file mode 100644 index 00000000000..4f9f3ee5474 --- /dev/null +++ b/vnet/etc/scripts/ip6 @@ -0,0 +1,15 @@ +packet-generator new { + name x + limit 1 + node ethernet-input + size 64-64 + no-recycle + data { + IP6: 1.2.3 -> 4.5.6 + ICMP: ::1 -> ::2 + ICMP echo_request + incrementing 100 + } +} + +tr add pg-input 100 diff --git a/vnet/etc/scripts/ixge b/vnet/etc/scripts/ixge new file mode 100644 index 00000000000..6722b5369cd --- /dev/null +++ b/vnet/etc/scripts/ixge @@ -0,0 +1,15 @@ +packet-generator new { + name x + limit 1 + node ip4-lookup + size 50-50 + data { + ICMP: 1.0.0.1 -> 1.0.0.3 ttl 1 + incrementing 30 + } +} + +comment { tr add pg-input 100 } +set int ip address TenGigabitEthernet5/0/0 33.0.1.1/8 +set int state TenGigabitEthernet5/0/0 up + diff --git a/vnet/etc/scripts/l2efpfilter b/vnet/etc/scripts/l2efpfilter new file mode 100644 index 00000000000..307b4436e81 --- /dev/null +++ b/vnet/etc/scripts/l2efpfilter @@ -0,0 +1,83 @@ + +set int ip address GigabitEthernet2/1/0 1.2.3.4/24 +set int state GigabitEthernet2/1/0 up +set int state tuntap-0 down + + +cre sub GigabitEthernet2/1/0 1 dot1q 1 +cre sub GigabitEthernet2/1/0 2 dot1q 2 +cre sub GigabitEthernet2/1/0 3 dot1q 3 +cre sub GigabitEthernet2/2/0 1 dot1q 1 +cre sub GigabitEthernet2/2/0 100 dot1q 100 + +set int l2 bridge GigabitEthernet2/1/0 0 +set int l2 bridge GigabitEthernet2/1/0.1 0 +set int l2 bridge GigabitEthernet2/1/0.2 0 +set int l2 bridge GigabitEthernet2/1/0.3 0 +set int l2 bridge GigabitEthernet2/2/0 0 +set int l2 bridge GigabitEthernet2/2/0.1 0 +set int l2 bridge GigabitEthernet2/2/0.100 0 + +set int l2 tag-rewrite GigabitEthernet2/2/0.1 push dot1q 50 +set int l2 tag-rewrite GigabitEthernet2/1/0.2 translate 1-1 dot1q 100 +set int l2 tag-rewrite GigabitEthernet2/1/0.3 translate 1-1 dot1q 99 + +set int l2 efp-filter GigabitEthernet2/2/0 +set int l2 efp-filter GigabitEthernet2/2/0.1 +set int l2 efp-filter GigabitEthernet2/2/0.100 + + +l2fib add 00:00:00:00:00:11 0 GigabitEthernet2/2/0.1 static +l2fib add 00:00:00:00:00:22 0 GigabitEthernet2/2/0.100 static + +set int state GigabitEthernet2/1/0 up +set int state GigabitEthernet2/1/0.1 up +set int state GigabitEthernet2/1/0.2 up +set int state GigabitEthernet2/1/0.3 up +set int state GigabitEthernet2/2/0 up +set int state GigabitEthernet2/2/0.1 up +set int state GigabitEthernet2/2/0.100 up + + +trace add pg-input 6 + +clear error +clear run +clear int + +packet-generator new { + name pre_vtr_fail + limit 10 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + data { + IP4: 0050.56b7.7c83 -> 00:00:00:00:00:11 vlan 1 + GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast + } +} + +packet-generator new { + name post_vtr_pass + limit 20 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + data { + IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 2 + GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast + } +} + +packet-generator new { + name post_vtr_fail + limit 50 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + data { + IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 3 + GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast + } +} + diff --git a/vnet/etc/scripts/l2efpfilter_perf b/vnet/etc/scripts/l2efpfilter_perf new file mode 100644 index 00000000000..b2f4b490cb8 --- /dev/null +++ b/vnet/etc/scripts/l2efpfilter_perf @@ -0,0 +1,58 @@ + +set int ip address GigabitEthernet2/1/0 1.2.3.4/24 +set int state GigabitEthernet2/1/0 up +set int state tuntap-0 down + + +cre sub GigabitEthernet2/1/0 1 dot1q 1 +cre sub GigabitEthernet2/1/0 2 dot1q 2 +cre sub GigabitEthernet2/1/0 3 dot1q 3 +cre sub GigabitEthernet2/2/0 1 dot1q 1 +cre sub GigabitEthernet2/2/0 100 dot1q 100 + +set int l2 bridge GigabitEthernet2/1/0 0 +set int l2 bridge GigabitEthernet2/1/0.1 0 +set int l2 bridge GigabitEthernet2/1/0.2 0 +set int l2 bridge GigabitEthernet2/1/0.3 0 +set int l2 bridge GigabitEthernet2/2/0 0 +set int l2 bridge GigabitEthernet2/2/0.1 0 +set int l2 bridge GigabitEthernet2/2/0.100 0 + +set int l2 tag-rewrite GigabitEthernet2/2/0.1 push dot1q 50 +set int l2 tag-rewrite GigabitEthernet2/1/0.2 translate 1-1 dot1q 100 +set int l2 tag-rewrite GigabitEthernet2/1/0.3 translate 1-1 dot1q 99 + +set int l2 efp-filter GigabitEthernet2/2/0 +set int l2 efp-filter GigabitEthernet2/2/0.1 +set int l2 efp-filter GigabitEthernet2/2/0.100 + + +l2fib add 00:00:00:00:00:11 0 GigabitEthernet2/2/0.1 static +l2fib add 00:00:00:00:00:22 0 GigabitEthernet2/2/0.100 static + +set int state GigabitEthernet2/1/0 up +set int state GigabitEthernet2/1/0.1 up +set int state GigabitEthernet2/1/0.2 up +set int state GigabitEthernet2/1/0.3 up +set int state GigabitEthernet2/2/0 up +set int state GigabitEthernet2/2/0.1 up +set int state GigabitEthernet2/2/0.100 up + + +trace add pg-input 6 + +clear error +clear run +clear int + +packet-generator new { + name post_vtr_pass + limit 9111003 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + data { + IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 2 + GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast + } +} diff --git a/vnet/etc/scripts/l2fib b/vnet/etc/scripts/l2fib new file mode 100644 index 00000000000..81ede171ef2 --- /dev/null +++ b/vnet/etc/scripts/l2fib @@ -0,0 +1,46 @@ + +set int ip address GigabitEthernet2/1/0 1.2.3.4/24 +set int state GigabitEthernet2/1/0 up + +set int state GigabitEthernet2/2/0 up +cre sub GigabitEthernet2/2/0 1 dot1q 9 +set int state GigabitEthernet2/2/0.1 up +set int state tuntap-0 down + +set int acl input GigabitEthernet2/1/0 +set int acl output GigabitEthernet2/1/0 +set int acl input GigabitEthernet2/2/0.1 +set int acl output GigabitEthernet2/2/0.1 + +set int l2 bridge GigabitEthernet2/1/0 0 +set int l2 bridge GigabitEthernet2/2/0.1 0 + +set int l2 tag-rewrite GigabitEthernet2/1/0 push dot1q 50 +set int l2 tag-rewrite GigabitEthernet2/2/0.1 pop 1 + + +trace add pg-input 6 + +clear error +clear run +clear int + +packet-generator new { + name new_input_if_index_mac_move + limit 4 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + size 98-98 + data { hex 0x00010203040500020406080a080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff } +} + +packet-generator new { + name dmac_hit + limit 7 + no-recycle + node ethernet-input + interface GigabitEthernet2/2/0 + size 98-98 + data { hex 0x00020406080a00224406080a8100000981000011080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff } +} diff --git a/vnet/etc/scripts/l2fib_perf b/vnet/etc/scripts/l2fib_perf new file mode 100644 index 00000000000..638317ff705 --- /dev/null +++ b/vnet/etc/scripts/l2fib_perf @@ -0,0 +1,29 @@ + +set int ip address GigabitEthernet2/1/0 1.2.3.4/24 +set int state GigabitEthernet2/1/0 up + +set int state GigabitEthernet2/2/0 up +cre sub GigabitEthernet2/2/0 1 dot1q 9 +set int state GigabitEthernet2/2/0.1 up +set int state tuntap-0 down + +set int l2 bridge GigabitEthernet2/1/0 0 +set int l2 bridge GigabitEthernet2/2/0.1 0 + +l2fib add 00:22:44:06:08:0a 0 GigabitEthernet2/1/0 static +l2fib add 00:02:04:06:08:0a 0 GigabitEthernet2/2/0.1 static + +cle er +cle int +cle run + +packet-generator new { + name perf + limit 9111003 + no-recycle + node ethernet-input + interface GigabitEthernet2/2/0.1 + size 98-98 + data { hex 0x00224406080a00020406080a81000009080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff } +} + diff --git a/vnet/etc/scripts/l2fib_xc b/vnet/etc/scripts/l2fib_xc new file mode 100644 index 00000000000..35d7342b3f7 --- /dev/null +++ b/vnet/etc/scripts/l2fib_xc @@ -0,0 +1,31 @@ + +set int ip address GigabitEthernet2/1/0 1.2.3.4/24 +set int state GigabitEthernet2/1/0 up + +set int state GigabitEthernet2/2/0 up +cre sub GigabitEthernet2/2/0 1 dot1q 9 +set int state GigabitEthernet2/2/0.1 up +set int state tuntap-0 down + +set int acl input GigabitEthernet2/1/0 +set int acl output GigabitEthernet2/1/0 +set int acl input GigabitEthernet2/2/0.1 +set int acl output GigabitEthernet2/2/0.1 + +set int l2 xc GigabitEthernet2/1/0 GigabitEthernet2/2/0.1 +set int l2 xc GigabitEthernet2/2/0.1 GigabitEthernet2/1/0 + +clear error +clear run +clear int + +packet-generator new { + name xc + limit 11 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + size 98-98 + data { hex 0x00010203040500020406080a080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff } +} + diff --git a/vnet/etc/scripts/l2flood b/vnet/etc/scripts/l2flood new file mode 100644 index 00000000000..013462cef64 --- /dev/null +++ b/vnet/etc/scripts/l2flood @@ -0,0 +1,42 @@ + +set int ip address GigabitEthernet2/1/0 1.2.3.4/24 +set int state GigabitEthernet2/1/0 up +set int state tuntap-0 down + + +loop create +cre sub loop0 1 dot1q 1 +cre sub loop0 2 dot1q 2 +cre sub loop0 3 dot1q 3 +cre sub GigabitEthernet2/1/0 1 dot1q 1 + + +set int l2 bridge loop0.1 7 +set int l2 bridge loop0.2 7 +set int l2 bridge loop0.3 7 +set int l2 bridge GigabitEthernet2/1/0.1 7 + +loop cre +set int l2 bridge loop1 7 bvi + +set int state GigabitEthernet2/1/0.1 up + +trace add pg-input 6 + +clear error +clear run +clear int + +packet-generator new { + name flood + limit 1 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + data { + IP4: 0050.56b7.7c83 -> ffff.ffff.ffff vlan 1 + GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast + } + +} + diff --git a/vnet/etc/scripts/l2tp b/vnet/etc/scripts/l2tp new file mode 100644 index 00000000000..337805aa44c --- /dev/null +++ b/vnet/etc/scripts/l2tp @@ -0,0 +1,134 @@ + +set int ip address GigabitEthernet2/1/0 1.2.3.4/24 +set int state GigabitEthernet2/1/0 up +set int state tuntap-0 down + + +cre sub GigabitEthernet2/1/0 1 dot1q 1 +cre sub GigabitEthernet2/1/0 2 dot1q 2 +cre sub GigabitEthernet2/1/0 3 dot1q 3 +cre sub GigabitEthernet2/2/0 1 dot1q 1 +cre sub GigabitEthernet2/2/0 100 dot1q 100 + + +set int l2 tag-rewrite GigabitEthernet2/1/0.1 pop 1 +set int l2 tag-rewrite GigabitEthernet2/1/0.2 pop 1 + + +l2tp session add client 11::1 our 22::2 l2-interface GigabitEthernet2/1/0.1 +l2tp session add client 11::1 our 22::3 l2-interface GigabitEthernet2/1/0.2 local-session-id 2 l2-sublayer-present + +ip route 11::1/128 via GigabitEthernet2/2/0 +set ip6 neighbor GigabitEthernet2/2/0 11::1 00:02:04:06:08:0a + +enable ip6 interface GigabitEthernet2/2/0 + +set int ip6 l2tpv3 GigabitEthernet2/2/0 + +set int state GigabitEthernet2/1/0 up +set int state GigabitEthernet2/1/0.1 up +set int state GigabitEthernet2/1/0.2 up +set int state GigabitEthernet2/1/0.3 up +set int state GigabitEthernet2/2/0 up + + +trace add pg-input 2 + +clear error +clear run +clear int + +packet-generator new { + name decap + limit 10 + size 200-200 + no-recycle + node ethernet-input + interface GigabitEthernet2/2/0 + data { + IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a + L2TP: 11::1 -> 22::2 + L2TP: session_id 1 cookie 0xffffffffffffffff + IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01 + UDP: 1.2.3.4 -> 5.6.7.8 + incrementing 8 + } +} + +packet-generator new { + name decap_bad_sid + limit 30 + size 200-200 + no-recycle + node ethernet-input + interface GigabitEthernet2/2/0 + data { + IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a + L2TP: 11::1 -> 22::2 + L2TP: session_id 0x999 cookie 0xffffffffffffffff + IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01 + UDP: 1.2.3.4 -> 5.6.7.8 + incrementing 8 + } +} + +packet-generator new { + name decap_bad_cookie + limit 50 + size 200-200 + no-recycle + node ethernet-input + interface GigabitEthernet2/2/0 + data { + IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a + L2TP: 11::1 -> 22::2 + L2TP: session_id 1 cookie 0x3333ffffffffffff + IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01 + UDP: 1.2.3.4 -> 5.6.7.8 + incrementing 8 + } +} + +packet-generator new { + name encap + limit 100 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + data { + IP4: 0000.5555.0002 -> 00:00:dd:dd:00:02 vlan 1 + UDP: 1.2.3.4 -> 5.6.7.8 + incrementing 8 + } +} + +packet-generator new { + name decap_sublayer + limit 300 + size 200-200 + no-recycle + node ethernet-input + interface GigabitEthernet2/2/0 + data { + IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a + L2TP: 11::1 -> 22::3 + L2TP: session_id 2 cookie 0xffffffffffffffff l2_sublayer 0 + IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01 + UDP: 1.2.3.4 -> 5.6.7.8 + incrementing 8 + } +} + +packet-generator new { + name encap_sublayer + limit 700 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + data { + IP4: 0000.5555.0002 -> 00:00:dd:dd:00:02 vlan 2 + UDP: 1.2.3.4 -> 5.6.7.8 + incrementing 8 + } +} + diff --git a/vnet/etc/scripts/ludd-cluster-1/lc1.script b/vnet/etc/scripts/ludd-cluster-1/lc1.script new file mode 100644 index 00000000000..f08c809012c --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/lc1.script @@ -0,0 +1,9 @@ +l2tp_set_lookup_key lookup_v6_src + +sw_interface_add_del_address GigabitEthernet2/3/0 db03::2/64 +sw_interface_set_flags GigabitEthernet2/3/0 admin-up + +comment sw_interface_add_del_address GigabitEthernet2/2/0 db02::2/64 +sw_interface_set_flags GigabitEthernet2/2/0 admin-up + +l2tp_session_add_del client_address db03::1 our_address db03::2 GigabitEthernet2/2/0 local_session_id 1 remote_session_id 3 local_cookie 11 remote_cookie 33 diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-classify b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-classify new file mode 100755 index 00000000000..74285912aeb --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-classify @@ -0,0 +1,8 @@ +classify table mask l2 src l3 ip4 src buckets 2 miss-next local + +comment { classify table mask l3 ip4 src buckets 2 miss-next local } +set ip classify intfc GigabitEthernet2/2/0 table-index 0 +set int ip address GigabitEthernet2/2/0 192.168.1.1/24 +set int state GigabitEthernet2/2/0 up + +classify session hit-next local table-index 0 match l2 src 00:50:56:b7:05:bb l3 ip4 src 192.168.1.2 diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-classify6 b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-classify6 new file mode 100644 index 00000000000..6579d50d3c5 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-classify6 @@ -0,0 +1,5 @@ +classify table mask l3 ip6 src buckets 2 miss-next local +set ip6 classify intfc GigabitEthernet2/2/0 table-index 0 +set int ip address GigabitEthernet2/2/0 db01::1/64 +set int state GigabitEthernet2/2/0 up +classify session hit-next local table-index 0 match l3 ip6 src db01::2 diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-classifyl2 b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-classifyl2 new file mode 100644 index 00000000000..6be4b1e52cf --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-classifyl2 @@ -0,0 +1,8 @@ +set int ip address GigabitEthernet2/2/0 192.168.1.1/24 +set int state GigabitEthernet2/2/0 up + +classify table mask l3 ip4 src buckets 2 l2-miss-next ethernet + +classify session advance 14 l2-hit-next ip4 table-index 0 match l3 ip4 src 192.168.1.2 + +set int l2 class intfc GigabitEthernet2/2/0 ip4-table 0 diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-dhcp b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-dhcp new file mode 100644 index 00000000000..c13a8f3aa5e --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-dhcp @@ -0,0 +1,23 @@ +loop cre +set int ip table loop0 12 +set int ip address loop0 192.168.1.1/24 +set int state loop0 up + +set int ip table GigabitEthernet2/2/0 12 +comment { set int ip address GigabitEthernet2/2/0 192.168.1.1/24 } +set int unnumbered GigabitEthernet2/2/0 use loop0 +set int state GigabitEthernet2/2/0 up + +set int ip table GigabitEthernet2/7/0 11 +set int ip address GigabitEthernet2/7/0 192.168.2.1/24 +set int state GigabitEthernet2/7/0 up + +uncomment { set dhcp proxy server 1.2.3.4 src-address 1.2.3.5 add-option-82 rx-fib-id 0 server-fib-id 0 } + +uncomment { set dhcp proxy server 192.168.2.2 src-address 192.168.2.1 add-option-82 rx-fib-id 12 server-fib-id 11 } + +ip route add 0.0.0.0/24 table 11 via local +ip route add 255.255.255.255/24 table 11 via local + +ip route add 0.0.0.0/24 table 12 via local +ip route add 255.255.255.255/24 table 12 via local diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-ioam.conf b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-ioam.conf new file mode 100644 index 00000000000..5234ace39bf --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-ioam.conf @@ -0,0 +1,15 @@ +comment { connects to ludd-cluster-2 eth1 addr db02::2/64 } +set int ip address GigabitEthernet2/2/0 db02::1/64 +set int state GigabitEthernet2/2/0 up + +comment { connects to ioam domain ludd-cluster-3 eth2 addr db03::3/64 } +set int ip address GigabitEthernet2/3/0 db03::1/64 +set int state GigabitEthernet2/3/0 up + +ioam set rewrite trace-elts 2 pow + +set ip6 neighbor GigabitEthernet2/3/0 db03::3 00:50:56:b7:05:cb + +ip route add db04::0/64 via db03::3 + +ioam set destination db04::0/64 add diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-l3vxlan.conf b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-l3vxlan.conf new file mode 100644 index 00000000000..ed43059dea9 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-l3vxlan.conf @@ -0,0 +1,12 @@ +comment { tunnel to ludd-cluster-3 6.0.3.3 on vlan 101 } + +set int ip address GigabitEthernet2/3/0 6.0.3.1/24 +set int state GigabitEthernet2/3/0 up + +comment { configure lc2 eth1 at e.g. 6.0.2.2/24 } + +set int ip address GigabitEthernet2/2/0 6.0.2.1/24 +set int state GigabitEthernet2/2/0 up + +vxlan tunnel src 6.0.3.1 peer 6.0.3.3 vni 123 adj 6.0.4.4/24 + diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-lisp.conf b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-lisp.conf new file mode 100644 index 00000000000..15a6ccb3b24 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-lisp.conf @@ -0,0 +1,18 @@ +comment { vpe_phase2 configuration } + +comment { local client facing interface on vlan 100 } +comment { configure lc2 eth1 at e.g. 6.0.2.2/24 } + +set int ip address GigabitEthernet2/2/0 6.0.2.1/24 +set int state GigabitEthernet2/2/0 up + +comment { tunnel to ludd-cluster-3 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.1/24 +set int state GigabitEthernet2/3/0 up + +lisp gpe tunnel src 6.0.3.1 dst 6.0.3.3 next-ip4 decap-next ip4 iid 1133 +set int ip address lisp_gpe_tunnel0 6.0.4.1/24 +set int state lisp_gpe_tunnel0 up + +lisp gpe tunnel src 6.0.3.3 dst 6.0.3.1 next-ip4 decap-next ip4 iid 3311 +set int stat lisp_gpe_tunnel1 up diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-mpls.conf b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-mpls.conf new file mode 100644 index 00000000000..b6cf8cd62c8 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-mpls.conf @@ -0,0 +1,17 @@ +comment { vpe_phase2 configuration } + +comment { local client facing interface on vlan 100 } +comment { configure lc2 eth1 at e.g. 6.0.2.2/24 } + +set int ip table GigabitEthernet2/2/0 1 +set int ip address GigabitEthernet2/2/0 6.0.2.1/24 +set int state GigabitEthernet2/2/0 up + +comment { tunnel to ludd-cluster-3 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.1/24 +set int state GigabitEthernet2/3/0 up + +mpls encap add label 30 fib 1 dest 6.0.3.3 +mpls decap add label 30 fib 1 + +create mpls gre tunnel src 6.0.3.1 dst 6.0.3.3 intfc 6.0.4.1/24 inner-fib-id 1 outer-fib-id 0 diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-nsh.conf b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-nsh.conf new file mode 100644 index 00000000000..47c1d263ce1 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-nsh.conf @@ -0,0 +1,18 @@ +comment { vpe_phase2 configuration } + +comment { local client facing interface on vlan 100 } +comment { configure lc2 eth1 at e.g. 6.0.2.2/24 } + +set int ip address GigabitEthernet2/2/0 6.0.2.1/24 +set int state GigabitEthernet2/2/0 up + +comment { tunnel to ludd-cluster-3 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.1/24 +set int state GigabitEthernet2/3/0 up + +nsh gre tunnel src 6.0.3.1 dst 6.0.3.3 c1 1 c2 2 c3 3 c4 4 spi 1133 si 0 next-ip4 +set int ip address nsh_gre_tunnel0 6.0.4.1/24 +set int state nsh_gre_tunnel0 up + +nsh gre tunnel src 6.0.3.3 dst 6.0.3.1 c1 1 c2 2 c3 3 c4 4 spi 3311 si 0 next-ip4 +set int stat nsh_gre_tunnel1 up diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-sr.conf b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-sr.conf new file mode 100644 index 00000000000..7cb612cb1e7 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-sr.conf @@ -0,0 +1,24 @@ +comment { test sr segment chunk-offset on } +test sr hmac validate on + +comment { trunk to ludd-cluster-3 } +set int ip address GigabitEthernet2/3/0 db03::2/64 +enable ip6 interface GigabitEthernet2/3/0 +set int state GigabitEthernet2/3/0 up + +comment { subscriber ludd-cluster-2 } +set int ip address GigabitEthernet2/2/0 db02::2/64 +enable ip6 interface GigabitEthernet2/2/0 +set int state GigabitEthernet2/2/0 up + +sr hmac id 2 key Gozzer +sr hmac id 3 key Hoser + +sr tunnel src db01::1 dst db04::1/128 next db03::1 next db04::1 tag db02::2 clean key Gozzer InPE 1 + +tap connect srlocal hwaddr random +set int ip6 table tap-0 1 +set int ip address tap-0 db04::99/64 +enable ip6 interface tap-0 +set int state tap-0 up +ip route add table 1 db02::0/64 lookup in table 0 diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-vxlan.conf b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-vxlan.conf new file mode 100644 index 00000000000..0688d40b564 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-vxlan.conf @@ -0,0 +1,17 @@ +comment { tunnel to ludd-cluster-3 6.0.3.3 on vlan 101 } + +set int ip address GigabitEthernet2/3/0 6.0.3.1/24 +set int state GigabitEthernet2/3/0 up + +comment { tunnel to ludd-cluster-5 6.0.5.5 on vlan 105 } +set int ip address GigabitEthernet2/7/0 6.0.5.1/24 +set int state GigabitEthernet2/7/0 up + +comment { configure lc2 eth1 at e.g. 6.0.2.2/24 } + +set int ip address GigabitEthernet2/2/0 6.0.2.1/24 +set int state GigabitEthernet2/2/0 up + +vxlan tunnel src 6.0.3.1 peer 6.0.3.3 peer 6.0.3.5 vni 123 +vxlan l2 GigabitEthernet2/2/0 vni 123 + diff --git a/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-vxnsh.conf b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-vxnsh.conf new file mode 100644 index 00000000000..1457ba21247 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-1/ludd-cluster-1-vxnsh.conf @@ -0,0 +1,18 @@ +comment { vpe_phase2 configuration } + +comment { local client facing interface on vlan 100 } +comment { configure lc2 eth1 at e.g. 6.0.2.2/24 } + +set int ip address GigabitEthernet2/2/0 6.0.2.1/24 +set int state GigabitEthernet2/2/0 up + +comment { tunnel to ludd-cluster-3 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.1/24 +set int state GigabitEthernet2/3/0 up + +nsh vxlan tunnel src 6.0.3.1 dst 6.0.3.3 c1 1 c2 2 c3 3 c4 4 vni 11 spi 1133 si 0 next-ip4 +set int ip address nsh_vxlan_gpe_tunnel0 6.0.4.1/24 +set int state nsh_vxlan_gpe_tunnel0 up + +nsh vxlan tunnel src 6.0.3.3 dst 6.0.3.1 c1 1 c2 2 c3 3 c4 4 vni 33 spi 3311 si 0 next-ip4 +set int stat nsh_vxlan_gpe_tunnel1 up diff --git a/vnet/etc/scripts/ludd-cluster-3/lc3.script b/vnet/etc/scripts/ludd-cluster-3/lc3.script new file mode 100644 index 00000000000..153988cefad --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-3/lc3.script @@ -0,0 +1,9 @@ +l2tp_set_lookup_key lookup_v6_src + +sw_interface_add_del_address GigabitEthernet2/3/0 db03::1/64 +sw_interface_set_flags GigabitEthernet2/3/0 admin-up + +comment sw_interface_add_del_address GigabitEthernet2/4/0 db02::2/64 +sw_interface_set_flags GigabitEthernet2/4/0 admin-up + +l2tp_session_add_del client_address db03::2 our_address db03::1 GigabitEthernet2/4/0 local_session_id 3 remote_session_id 1 local_cookie 33 remote_cookie 11 diff --git a/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-ioam.conf b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-ioam.conf new file mode 100644 index 00000000000..88b5ab2c389 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-ioam.conf @@ -0,0 +1,14 @@ +comment { connects to ludd-cluster-4 eth3 addr db04::4/64 } +set int ip address GigabitEthernet2/4/0 db04::1/64 +set int state GigabitEthernet2/4/0 up + +comment { connects to ioam domain ludd-cluster-1 addr db03::1/64 } +set int ip address GigabitEthernet2/3/0 db03::3/64 +set int state GigabitEthernet2/3/0 up + +set ip6 neighbor GigabitEthernet2/3/0 db03::1 00:50:56:b7:05:bf +set ip6 neighbor GigabitEthernet2/4/0 db04::4 00:50:56:b7:05:d2 + +ip route add db02::0/64 via db03::1 + +ioam set destination db04::4/128 pop diff --git a/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-l3vxlan.conf b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-l3vxlan.conf new file mode 100644 index 00000000000..94357d7de9e --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-l3vxlan.conf @@ -0,0 +1,12 @@ +comment { local client facing interface on vlan 102 } + +comment { configure client lc4 eth1 } +set int ip address GigabitEthernet2/4/0 6.0.4.1/24 +set int state GigabitEthernet2/4/0 up + +comment { tunnel to ludd-cluster-1 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.3/24 +set int state GigabitEthernet2/3/0 up + +vxlan tunnel src 6.0.3.3 peer 6.0.3.1 vni 123 adj 6.0.2.0/24 + diff --git a/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-lisp.conf b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-lisp.conf new file mode 100644 index 00000000000..faab4bf7758 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-lisp.conf @@ -0,0 +1,16 @@ +comment { local client facing interface on vlan 102 } +comment { configure lc4 eth1 at e.g. 6.0.4.4/24 } + +set int ip address GigabitEthernet2/4/0 6.0.4.1/24 +set int state GigabitEthernet2/4/0 up + +comment { tunnel to ludd-cluster-1 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.3/24 +set int state GigabitEthernet2/3/0 up + +lisp gpe tunnel src 6.0.3.3 dst 6.0.3.1 next-ip4 decap-next ip4 iid 3311 +set int ip address lisp_gpe_tunnel0 6.0.2.3/24 +set int state lisp_gpe_tunnel0 up + +lisp gpe tunnel src 6.0.3.1 dst 6.0.3.3 next-ip4 decap-next ip4 iid 1133 +set int state lisp_gpe_tunnel1 up diff --git a/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-mpls-l2.conf b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-mpls-l2.conf new file mode 100644 index 00000000000..2d94b2bf58c --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-mpls-l2.conf @@ -0,0 +1,24 @@ +comment { vpe_phase1d configuration } + +comment { local client facing interface on vlan 102 } +comment { configure lc4 eth1 at e.g. 6.0.4.4/24 } + +set int ip table GigabitEthernet2/4/0 1 +set int ip address GigabitEthernet2/4/0 6.0.4.1/24 +set int state GigabitEthernet2/4/0 up + +comment { tunnel to ludd-cluster-1 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.3/24 +set int state GigabitEthernet2/3/0 up + +mpls encap add label 30 fib 1 dest 6.0.3.1 +mpls decap add label 30 next output GigabitEthernet2/4/0 + +comment { create mpls gre tunnel src 6.0.3.3 dst 6.0.3.1 intfc 6.0.2.1/24 inner-fib-id 1 outer-fib-id 0 l2-only } + +comment {set int l2 xconnect GigabitEthernet2/4/0 mpls-gre0 } + +uncomment { create mpls ethernet tunnel dst 00:50:56:b7:05:bf adj 6.0.3.1/8 l2-only tx-intfc GigabitEthernet2/3/0 fib-id 1 } + +uncomment { set int l2 xconnect GigabitEthernet2/4/0 mpls-eth0 } + diff --git a/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-mpls.conf b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-mpls.conf new file mode 100644 index 00000000000..ea8b75f765a --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-mpls.conf @@ -0,0 +1,17 @@ +comment { vpe_phase1d configuration } + +comment { local client facing interface on vlan 102 } +comment { configure lc4 eth1 at e.g. 6.0.4.4/24 } + +set int ip table GigabitEthernet2/4/0 1 +set int ip address GigabitEthernet2/4/0 6.0.4.1/24 +set int state GigabitEthernet2/4/0 up + +comment { tunnel to ludd-cluster-1 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.3/24 +set int state GigabitEthernet2/3/0 up + +mpls encap add label 30 fib 1 dest 6.0.3.1 +mpls decap add label 30 fib 1 + +create mpls gre tunnel src 6.0.3.3 dst 6.0.3.1 intfc 6.0.2.1/24 inner-fib-id 1 outer-fib-id 0 diff --git a/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-nsh.conf b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-nsh.conf new file mode 100644 index 00000000000..7a67b9af064 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-nsh.conf @@ -0,0 +1,19 @@ +comment { vpe_phase1d configuration } + +comment { local client facing interface on vlan 102 } +comment { configure lc4 eth1 at e.g. 6.0.4.4/24 } + +set int ip address GigabitEthernet2/4/0 6.0.4.1/24 +set int state GigabitEthernet2/4/0 up + +comment { tunnel to ludd-cluster-1 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.3/24 +set int state GigabitEthernet2/3/0 up + +nsh gre tunnel src 6.0.3.3 dst 6.0.3.1 c1 1 c2 2 c3 3 c4 4 spi 3311 si 0 next-ip4 +set int ip address nsh_gre_tunnel0 6.0.2.3/24 +set int state nsh_gre_tunnel0 up + +nsh gre tunnel src 6.0.3.1 dst 6.0.3.3 c1 1 c2 2 c3 3 c4 4 spi 1133 si 0 next-ip4 +set int state nsh_gre_tunnel1 up + diff --git a/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-sr.conf b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-sr.conf new file mode 100644 index 00000000000..d9cf57625cf --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-sr.conf @@ -0,0 +1,28 @@ +comment { test sr segment chunk-offset on } +test sr hmac validate on + +comment { trunk to ludd-cluster-1 } +set int ip address GigabitEthernet2/3/0 db03::1/64 +enable ip6 interface GigabitEthernet2/3/0 +set int state GigabitEthernet2/3/0 up + +comment { subscriber ludd-cluster-4 } +set int ip address GigabitEthernet2/4/0 db04::2/64 + +comment { next address to fake out ND on shared LAN segment } +comment { set int ip address GigabitEthernet2/4/0 db02::13/64 } + +enable ip6 interface GigabitEthernet2/4/0 +set int state GigabitEthernet2/4/0 up + +sr hmac id 2 key Gozzer +sr hmac id 3 key Hoser + +sr tunnel src db04::1 dst db02::1/128 next db03::2 next db02::1 tag db04::2 clean key Hoser + +tap connect srlocal hwaddr random +set int ip6 table tap-0 1 +set int ip address tap-0 db04::99/64 +enable ip6 interface tap-0 +set int state tap-0 up +ip route add table 1 db02::0/64 lookup in table 0 diff --git a/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-vxlan.conf b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-vxlan.conf new file mode 100644 index 00000000000..3caa65fe7b1 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-vxlan.conf @@ -0,0 +1,16 @@ + +comment { local client facing interface on vlan 102 } + +comment { configure client lc4 eth1 } +set int state GigabitEthernet2/4/0 up + +comment { tunnel to ludd-cluster-1 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.3/24 +set int state GigabitEthernet2/3/0 up + +comment { tunnel to ludd-cluster-5 on vlan 105 } +set int ip address GigabitEthernet2/7/0 6.0.5.3/24 +set int state GigabitEthernet2/3/0 up + +vxlan tunnel src 6.0.3.3 peer 6.0.3.1 peer 6.0.3.5 vni 123 +vxlan l2 GigabitEthernet2/4/0 vni 123 diff --git a/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-vxnsh.conf b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-vxnsh.conf new file mode 100644 index 00000000000..628b56653a9 --- /dev/null +++ b/vnet/etc/scripts/ludd-cluster-3/ludd-cluster-3-vxnsh.conf @@ -0,0 +1,18 @@ +comment { vpe_phase1d configuration } + +comment { local client facing interface on vlan 102 } +comment { configure lc4 eth1 at e.g. 6.0.4.4/24 } + +set int ip address GigabitEthernet2/4/0 6.0.4.1/24 +set int state GigabitEthernet2/4/0 up + +comment { tunnel to ludd-cluster-1 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.3/24 +set int state GigabitEthernet2/3/0 up + +nsh vxlan tunnel src 6.0.3.3 dst 6.0.3.1 c1 1 c2 2 c3 3 c4 4 vni 33 spi 3311 si 0 next-ip4 +set int ip address nsh_vxlan_gpe_tunnel0 6.0.2.3/24 +set int state nsh_vxlan_gpe_tunnel0 up + +nsh vxlan tunnel src 6.0.3.1 dst 6.0.3.3 c1 1 c2 2 c3 3 c4 4 vni 11 spi 1133 si 0 next-ip4 +set int state nsh_vxlan_gpe_tunnel1 up diff --git a/vnet/etc/scripts/mpls-o-ethernet/ludd-cluster-1.conf b/vnet/etc/scripts/mpls-o-ethernet/ludd-cluster-1.conf new file mode 100644 index 00000000000..e439ccb0fdf --- /dev/null +++ b/vnet/etc/scripts/mpls-o-ethernet/ludd-cluster-1.conf @@ -0,0 +1,17 @@ +comment { vpe_phase2 configuration } + +comment { local client facing interface on vlan 100 } +comment { configure lc2 eth1 at e.g. 6.0.2.2/24 } + +set int ip table GigabitEthernet2/2/0 1 +set int ip address GigabitEthernet2/2/0 6.0.2.1/24 +set int state GigabitEthernet2/2/0 up + +comment { tunnel to ludd-cluster-3 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.1/24 +set int state GigabitEthernet2/3/0 up + +mpls encap add label 30 fib 1 dest 6.0.4.1 +mpls decap add label 30 fib 1 + +create mpls ethernet tunnel dst 00:50:56:b7:05:cb adj 6.0.4.1/24 tx-intfc GigabitEthernet2/3/0 fib-id 1 diff --git a/vnet/etc/scripts/mpls-o-ethernet/ludd-cluster-3.conf b/vnet/etc/scripts/mpls-o-ethernet/ludd-cluster-3.conf new file mode 100644 index 00000000000..3d71647c1d2 --- /dev/null +++ b/vnet/etc/scripts/mpls-o-ethernet/ludd-cluster-3.conf @@ -0,0 +1,17 @@ +comment { vpe_phase1d configuration } + +comment { local client facing interface on vlan 102 } +comment { configure lc4 eth1 at e.g. 6.0.4.4/24 } + +set int ip table GigabitEthernet2/4/0 1 +set int ip address GigabitEthernet2/4/0 6.0.4.1/24 +set int state GigabitEthernet2/4/0 up + +comment { tunnel to ludd-cluster-1 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.3/24 +set int state GigabitEthernet2/3/0 up + +mpls encap add label 30 fib 1 dest 6.0.2.1 +mpls decap add label 30 fib 1 + +create mpls ethernet tunnel dst 00:50:56:b7:05:bf adj 6.0.2.1/24 tx-intfc GigabitEthernet2/3/0 fib-id 1 diff --git a/vnet/etc/scripts/mpls-o-gre/ludd-cluster-1.conf b/vnet/etc/scripts/mpls-o-gre/ludd-cluster-1.conf new file mode 100644 index 00000000000..992a24c428e --- /dev/null +++ b/vnet/etc/scripts/mpls-o-gre/ludd-cluster-1.conf @@ -0,0 +1,17 @@ +comment { vpe_phase2 configuration } + +comment { local client facing interface on vlan 100 } +comment { configure lc2 eth1 at e.g. 6.0.2.2/24 } + +set int ip table GigabitEthernet2/2/0 1 +set int ip address GigabitEthernet2/2/0 6.0.2.1/24 +set int state GigabitEthernet2/2/0 up + +comment { tunnel to ludd-cluster-3 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.1/24 +set int state GigabitEthernet2/3/0 up + +mpls encap add label 30 fib 1 dest 6.0.3.3 +mpls decap add label 30 fib 1 + +create mpls gre tunnel src 6.0.3.1 dst 6.0.3.3 intfc 6.0.4.1/24 inner-fib-id 1 outer-fib-id 0 diff --git a/vnet/etc/scripts/mpls-o-gre/ludd-cluster-3.conf b/vnet/etc/scripts/mpls-o-gre/ludd-cluster-3.conf new file mode 100644 index 00000000000..ea8b75f765a --- /dev/null +++ b/vnet/etc/scripts/mpls-o-gre/ludd-cluster-3.conf @@ -0,0 +1,17 @@ +comment { vpe_phase1d configuration } + +comment { local client facing interface on vlan 102 } +comment { configure lc4 eth1 at e.g. 6.0.4.4/24 } + +set int ip table GigabitEthernet2/4/0 1 +set int ip address GigabitEthernet2/4/0 6.0.4.1/24 +set int state GigabitEthernet2/4/0 up + +comment { tunnel to ludd-cluster-1 on vlan 101 } +set int ip address GigabitEthernet2/3/0 6.0.3.3/24 +set int state GigabitEthernet2/3/0 up + +mpls encap add label 30 fib 1 dest 6.0.3.1 +mpls decap add label 30 fib 1 + +create mpls gre tunnel src 6.0.3.3 dst 6.0.3.1 intfc 6.0.2.1/24 inner-fib-id 1 outer-fib-id 0 diff --git a/vnet/etc/scripts/mpls-o-gre/ludd-cluster-5-dhcpd.conf b/vnet/etc/scripts/mpls-o-gre/ludd-cluster-5-dhcpd.conf new file mode 100644 index 00000000000..f0f659cd55f --- /dev/null +++ b/vnet/etc/scripts/mpls-o-gre/ludd-cluster-5-dhcpd.conf @@ -0,0 +1,116 @@ +# +# Sample configuration file for ISC dhcpd for Debian +# +# Attention: If /etc/ltsp/dhcpd.conf exists, that will be used as +# configuration file instead of this file. +# +# + +# The ddns-updates-style parameter controls whether or not the server will +# attempt to do a DNS update when a lease is confirmed. We default to the +# behavior of the version 2 packages ('none', since DHCP v2 didn't +# have support for DDNS.) +ddns-update-style none; + +# option definitions common to all supported networks... +option domain-name "example.org"; +option domain-name-servers ns1.example.org, ns2.example.org; + +default-lease-time 600; +max-lease-time 7200; + +# If this DHCP server is the official DHCP server for the local +# network, the authoritative directive should be uncommented. +#authoritative; + +# Use this to send dhcp log messages to a different log file (you also +# have to hack syslog.conf to complete the redirection). +log-facility local7; + +# No service will be given on this subnet, but declaring it helps the +# DHCP server to understand the network topology. + +#subnet 10.152.187.0 netmask 255.255.255.0 { +#} + +# This is a very basic subnet declaration. + +#subnet 10.254.239.0 netmask 255.255.255.224 { +# range 10.254.239.10 10.254.239.20; +# option routers rtr-239-0-1.example.org, rtr-239-0-2.example.org; +#} + +# This declaration allows BOOTP clients to get dynamic addresses, +# which we don't really recommend. + +#subnet 10.254.239.32 netmask 255.255.255.224 { +# range dynamic-bootp 10.254.239.40 10.254.239.60; +# option broadcast-address 10.254.239.31; +# option routers rtr-239-32-1.example.org; +#} + +# A slightly different configuration for an internal subnet. +#subnet 10.5.5.0 netmask 255.255.255.224 { +# range 10.5.5.26 10.5.5.30; +# option domain-name-servers ns1.internal.example.org; +# option domain-name "internal.example.org"; +# option routers 10.5.5.1; +# option broadcast-address 10.5.5.31; +# default-lease-time 600; +# max-lease-time 7200; +#} + +# Hosts which require special configuration options can be listed in +# host statements. If no address is specified, the address will be +# allocated dynamically (if possible), but the host-specific information +# will still come from the host declaration. + +#host passacaglia { +# hardware ethernet 0:0:c0:5d:bd:95; +# filename "vmunix.passacaglia"; +# server-name "toccata.fugue.com"; +#} + +# Fixed IP addresses can also be specified for hosts. These addresses +# should not also be listed as being available for dynamic assignment. +# Hosts for which fixed IP addresses have been specified can boot using +# BOOTP or DHCP. Hosts for which no fixed address is specified can only +# be booted with DHCP, unless there is an address range on the subnet +# to which a BOOTP client is connected which has the dynamic-bootp flag +# set. +#host fantasia { +# hardware ethernet 08:00:07:26:c0:a5; +# fixed-address fantasia.fugue.com; +#} + +# You can declare a class of clients and then do address allocation +# based on that. The example below shows a case where all clients +# in a certain class get addresses on the 10.17.224/24 subnet, and all +# other clients get addresses on the 10.0.29/24 subnet. + +#class "foo" { +# match if substring (option vendor-class-identifier, 0, 4) = "SUNW"; +#} + +#shared-network 224-29 { +# subnet 10.17.224.0 netmask 255.255.255.0 { +# option routers rtr-224.example.org; +# } +# subnet 10.0.29.0 netmask 255.255.255.0 { +# option routers rtr-29.example.org; +# } +# pool { +# allow members of "foo"; +# range 10.17.224.10 10.17.224.250; +# } +# pool { +# deny members of "foo"; +# range 10.0.29.10 10.0.29.230; +# } +#} +subnet 6.0.0.0 netmask 255.255.0.0 { + range 6.0.2.2 6.0.2.5; + option routers 6.0.2.1; + default-lease-time 15; + max-lease-time 15; +} diff --git a/vnet/etc/scripts/pcap b/vnet/etc/scripts/pcap new file mode 100644 index 00000000000..692e5f27012 --- /dev/null +++ b/vnet/etc/scripts/pcap @@ -0,0 +1,18 @@ +packet-generator new { + name x + limit 1 + node ethernet-input + no-recycle + pcap /home/eliot/pcap-data/ISIS_level1_adjacency.cap +} + +packet-generator new { + name y + limit 1 + node hdlc-input + no-recycle + pcap /home/eliot/pcap-data/ISIS_p2p_adjacency.cap +} + +tr add pg-input 10 + diff --git a/vnet/etc/scripts/probe4 b/vnet/etc/scripts/probe4 new file mode 100644 index 00000000000..b530e0dbc1d --- /dev/null +++ b/vnet/etc/scripts/probe4 @@ -0,0 +1,11 @@ +ethernet create-interfaces +set int state fake-eth0 up +set int ip address fake-eth0 1.0.0.1/24 +comment { error } +comment { ip probe fake-eth0 1.0.0.1 } +comment { ip probe fake-eth0 1.2.3.4 } +comment { error } +comment { ip probe fake-eth0 1.0.0.2 } + + + diff --git a/vnet/etc/scripts/probe6 b/vnet/etc/scripts/probe6 new file mode 100644 index 00000000000..a5490c90b98 --- /dev/null +++ b/vnet/etc/scripts/probe6 @@ -0,0 +1,7 @@ +ethernet create-interfaces +set int state fake-eth0 up +set int ip address fake-eth0 2000::1/64 +comment { ip probe fake-eth0 2000::1 } + + + diff --git a/vnet/etc/scripts/rewrite b/vnet/etc/scripts/rewrite new file mode 100644 index 00000000000..d41b9dbf7fc --- /dev/null +++ b/vnet/etc/scripts/rewrite @@ -0,0 +1,62 @@ + + +comment { test ipv4 port/vlan/qinq rewrites by generating arps } + +set int state tuntap-0 down + +set int ip address GigabitEthernet2/2/0 1.2.5.4/24 + +cre sub GigabitEthernet2/2/0 1 dot1q 6 exact-match +set int ip address GigabitEthernet2/2/0.1 1.2.6.4/24 + +cre sub GigabitEthernet2/2/0 2 dot1ad 7 inner-dot1q 8 exact-match +set int ip address GigabitEthernet2/2/0.2 1.2.7.4/24 + +set int state GigabitEthernet2/1/0 up +set int state GigabitEthernet2/2/0 up +set int state GigabitEthernet2/2/0.1 up +set int state GigabitEthernet2/2/0.2 up + +trace add pg-input 2 + +cle er +cle int +cle run + +packet-generator new { + name toport + limit 2 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + data { + IP4: 0050.56b7.7c83 -> 0050.56b7.296d + GRE: 8.0.0.1 -> 1.2.5.6 mpls_unicast + } +} + +packet-generator new { + name tovlan + limit 2 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + data { + IP4: 0050.56b7.7c83 -> 0050.56b7.296d + GRE: 8.0.0.1 -> 1.2.6.6 mpls_unicast + } +} + +packet-generator new { + name toqinq + limit 2 + no-recycle + node ethernet-input + interface GigabitEthernet2/1/0 + data { + IP4: 0050.56b7.7c83 -> 0050.56b7.296d + GRE: 8.0.0.1 -> 1.2.7.6 mpls_unicast + } +} + + diff --git a/vnet/etc/scripts/rpf b/vnet/etc/scripts/rpf new file mode 100644 index 00000000000..571c6b79a8c --- /dev/null +++ b/vnet/etc/scripts/rpf @@ -0,0 +1,18 @@ +packet-generator new { + name x + limit 1 + node ip4-input + size 64-64 + no-recycle + data { + ICMP: 1.2.3.4 -> 5.6.7.8 + ICMP echo_request + incrementing 100 + } +} + +tr add pg-input 100 +set interface ip source-check pg/stream-0 +ip route 1.2.3.4/32 via pg/stream-0 0x01020304 +ip route 5.6.7.8/32 via drop + diff --git a/vnet/etc/scripts/rtt-test b/vnet/etc/scripts/rtt-test new file mode 100644 index 00000000000..5501b99dc45 --- /dev/null +++ b/vnet/etc/scripts/rtt-test @@ -0,0 +1,31 @@ +set int state TenGigabitEthernet4/0/0 up +set int state TenGigabitEthernet5/0/0 up + +set int ip address TenGigabitEthernet4/0/0 1.0.0.1/24 +set int ip address TenGigabitEthernet5/0/0 2.0.0.1/24 + +ip route table 0 1.0.0.2/32 via TenGigabitEthernet4/0/0 IP4: 00:1b:21:74:5b:04 -> 00:1b:21:79:8e:bc +ip route table 0 2.0.0.2/32 via TenGigabitEthernet5/0/0 IP4: 00:1b:21:79:8e:bc -> 00:1b:21:74:5b:04 +ip route table 1 2.0.0.2/32 via local +ip route table 1 1.0.0.2/32 via local + +set int ip table TenGigabitEthernet5/0/0 1 +set int ip table TenGigabitEthernet4/0/0 1 + +comment { trace add rtt-test-tx 100 } +comment { trace add ixge-input 100 } +comment { te rtt { 1.0.0.2 -> 2.0.0.2 count 1e4 rate 1e9 size 100 histogram-time 1e-5 } } + +packet-generator new { + name x + limit 1 + node ip4-input + size 170-170 + data { + ICMP: 1.0.0.2 -> 2.0.0.2 + ICMP echo_request + incrementing 100 + } +} + + diff --git a/vnet/etc/scripts/speed b/vnet/etc/scripts/speed new file mode 100644 index 00000000000..d60d671f29f --- /dev/null +++ b/vnet/etc/scripts/speed @@ -0,0 +1,14 @@ +packet-generator new { + name x + limit 1 + node ip4-input + size 50-50 + data { + ICMP: 1.2.3.4 -> 5.6.7.8 + ICMP echo_request + incrementing 100 + } +} + +tr add pg-input 100 +ip route 5.6.7.8/32 via pg/stream-0 000102030405060708090a0b0c0d diff --git a/vnet/etc/scripts/sr/ludd-cluster-1.conf b/vnet/etc/scripts/sr/ludd-cluster-1.conf new file mode 100644 index 00000000000..f257b1bc521 --- /dev/null +++ b/vnet/etc/scripts/sr/ludd-cluster-1.conf @@ -0,0 +1,27 @@ +comment { test sr segment chunk-offset on } +test sr hmac validate on + +comment { trunk to ludd-cluster-3 } +set int ip address GigabitEthernet2/3/0 db03::2/64 +enable ip6 interface GigabitEthernet2/3/0 +set int state GigabitEthernet2/3/0 up + +comment { subscriber ludd-cluster-2 } +set int ip address GigabitEthernet2/2/0 db02::2/64 +enable ip6 interface GigabitEthernet2/2/0 +set int state GigabitEthernet2/2/0 up + +sr hmac id 2 key Gozzer +sr hmac id 3 key Hoser + +sr tunnel src db01::1 dst db04::1/128 next db03::1 next db04::1 tag db02::2 clean key Gozzer InPE 1 + +comment { sr unaware service chaining to db03::5 } +comment { sr tunnel src db01::1 dst db04::1/128 next db03::1 next db03::5 next db04::1 tag db02::2 clean key Gozzer InPE 1 } + +comment { tap connect srlocal hwaddr random } +comment { set int ip6 table tap-0 1 } +comment { set int ip address tap-0 db04::99/64 } +comment { enable ip6 interface tap-0 } +comment { set int state tap-0 up } +comment { ip route add table 1 db02::0/64 lookup in table 0 } diff --git a/vnet/etc/scripts/sr/ludd-cluster-2.sh b/vnet/etc/scripts/sr/ludd-cluster-2.sh new file mode 100755 index 00000000000..55b835063c7 --- /dev/null +++ b/vnet/etc/scripts/sr/ludd-cluster-2.sh @@ -0,0 +1,3 @@ +#!/bin/bash +ifconfig eth2 inet6 add db02::1/64 +route -A inet6 add db04::1/128 gw db02::2 diff --git a/vnet/etc/scripts/sr/ludd-cluster-3.conf b/vnet/etc/scripts/sr/ludd-cluster-3.conf new file mode 100644 index 00000000000..005bee6e3e4 --- /dev/null +++ b/vnet/etc/scripts/sr/ludd-cluster-3.conf @@ -0,0 +1,22 @@ +comment { trunk to ludd-cluster-1 } +set int ip address GigabitEthernet2/0/0 db03::1/64 +enable ip6 interface GigabitEthernet2/0/0 +set int state GigabitEthernet2/0/0 up + +comment { subscriber ludd-cluster-4 } +set int ip address GigabitEthernet2/2/0 db04::2/64 + +comment { next address to fake out ND on shared LAN segment } +set int ip address GigabitEthernet2/2/0 db02::13/64 + +enable ip6 interface GigabitEthernet2/2/0 +set int state GigabitEthernet2/2/0 up + +sr tunnel src db04::1 dst db02::1/128 next db03::2 next db02::1 tag db04::2 clean + +tap connect srlocal hwaddr random +set int ip6 table tap-0 1 +set int ip address tap-0 db04::99/64 +enable ip6 interface tap-0 +set int state tap-0 up +ip route add table 1 db02::0/64 lookup in table 0 diff --git a/vnet/etc/scripts/sr/ludd-cluster-4.sh b/vnet/etc/scripts/sr/ludd-cluster-4.sh new file mode 100755 index 00000000000..029368dbb8b --- /dev/null +++ b/vnet/etc/scripts/sr/ludd-cluster-4.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +ifconfig eth1 inet6 add db04::1/64 +route -A inet6 add db02::1/128 gw db04::2 diff --git a/vnet/etc/scripts/sr/srlocal.sh b/vnet/etc/scripts/sr/srlocal.sh new file mode 100755 index 00000000000..2f568408b94 --- /dev/null +++ b/vnet/etc/scripts/sr/srlocal.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +ifconfig srlocal inet6 add db04::1/64 +route -6 add db02::0/64 gw db04::99 diff --git a/vnet/etc/scripts/srp b/vnet/etc/scripts/srp new file mode 100644 index 00000000000..7cc37011386 --- /dev/null +++ b/vnet/etc/scripts/srp @@ -0,0 +1,27 @@ +srp create-interfaces +srp create-interfaces +set int hw-class fake-srp0 srp +set int hw-class fake-srp1 srp + +packet-generator new { + name x + limit 1 + node ip4-input + size 64-64 + no-recycle + data { + ICMP: 1.0.0.2 -> 2.0.0.2 + ICMP echo_request + incrementing 100 + } +} + +tr add pg-input 100 +set int state fake-srp0 up +set int state fake-srp1 up + +set int ip address fake-srp0 1.0.0.1/24 +set int ip address fake-srp1 2.0.0.1/24 + + + diff --git a/vnet/etc/scripts/tcp b/vnet/etc/scripts/tcp new file mode 100644 index 00000000000..a2ee8b2dacc --- /dev/null +++ b/vnet/etc/scripts/tcp @@ -0,0 +1,16 @@ +packet-generator new { + name x + limit 1 + node ip4-input + size 64-64 + no-recycle + data { + TCP: 1.2.3.4 -> 5.6.7.8 + TCP: 1234 -> 5678 + incrementing 100 + } +} + +tr add pg-input 100 +ip route 5.6.7.8/32 via local +ip route 1.2.3.4/32 via local diff --git a/vnet/etc/scripts/tcp-test b/vnet/etc/scripts/tcp-test new file mode 100644 index 00000000000..52bfbcdd8c0 --- /dev/null +++ b/vnet/etc/scripts/tcp-test @@ -0,0 +1,6 @@ +int create-ethernet +set int ip address fake-eth0 1.2.3.4/24 +set int state fake-eth0 up +ip route 1.2.3.5/32 via local +trace add tuntap-rx 100 + diff --git a/vnet/etc/scripts/tf-ucs-1 b/vnet/etc/scripts/tf-ucs-1 new file mode 100644 index 00000000000..efa5f2dc27f --- /dev/null +++ b/vnet/etc/scripts/tf-ucs-1 @@ -0,0 +1,16 @@ +comment { connected to Ixia port 1} +set int ip address TenGigabitEthernet4/0/0 1.0.0.2/8 + +set int state TenGigabitEthernet4/0/0 up + +comment { connected to Ixia port 2} +set int ip address TenGigabitEthernet4/0/1 2.0.0.2/8 +set int state TenGigabitEthernet4/0/1 up + +comment { connected to Ixia port 3} +set int ip address TenGigabitEthernet6/0/0 3.0.0.2/8 +set int state TenGigabitEthernet6/0/0 up + +comment { connected to Ixia port 4} +set int ip address TenGigabitEthernet6/0/1 4.0.0.2/8 +set int state TenGigabitEthernet6/0/1 up diff --git a/vnet/etc/scripts/virl/ip6sr.virl b/vnet/etc/scripts/virl/ip6sr.virl new file mode 100644 index 00000000000..5d4d1a0ac67 --- /dev/null +++ b/vnet/etc/scripts/virl/ip6sr.virl @@ -0,0 +1,874 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<topology xmlns="http://www.cisco.com/VIRL" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" schemaVersion="0.9" xsi:schemaLocation="http://www.cisco.com/VIRL https://raw.github.com/CiscoVIRL/schema/v0.9/virl.xsd">
+ <extensions>
+ <entry key="management_network" type="String">flat</entry>
+ <entry key="AutoNetkit.address_family" type="String">dual_stack</entry>
+ <entry key="AutoNetkit.enable_routing" type="Boolean">false</entry>
+ </extensions>
+ <node name="iosv-1" type="SIMPLE" subtype="IOSv" location="195,78" ipv4="192.168.0.2" ipv6="::b:1:0:0:2">
+ <extensions>
+ <entry key="config" type="string">!
+! Last configuration change at 16:41:18 UTC Fri Mar 27 2015
+!
+version 15.4
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+!
+hostname iosv-1
+!
+boot-start-marker
+boot-end-marker
+!
+!
+enable password cisco
+!
+no aaa new-model
+!
+!
+!
+mmi polling-interval 60
+no mmi auto-configure
+no mmi pvc
+mmi snmp-timeout 180
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+no ip domain lookup
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+multilink bundle-name authenticated
+!
+!
+cts logging verbose
+!
+!
+!
+redundancy
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.2 255.255.255.255
+ ipv6 address ::B:1:0:0:2/128
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ip address 172.16.1.193 255.255.255.0
+ duplex auto
+ speed auto
+ media-type rj45
+!
+interface GigabitEthernet0/1
+ description to vpp-1
+ ip address 10.0.0.5 255.255.255.252
+ duplex auto
+ speed auto
+ media-type rj45
+ ipv6 address ::A:1:1:0:6/126
+!
+ip forward-protocol nd
+!
+!
+no ip http server
+no ip http secure-server
+!
+! route to iosv-2
+ipv6 route ::A:1:1:0:16/128 ::A:1:1:0:7
+! route to iosv-4
+ipv6 route ::A:1:1:0:22/128 ::A:1:1:0:7
+!
+!
+!
+control-plane
+!
+banner exec `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner incoming `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner login `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+!
+line con 0
+ password cisco
+line aux 0
+line vty 0 4
+ exec-timeout 720 0
+ password cisco
+ login
+ transport input telnet ssh
+!
+no scheduler allocate
+!
+end</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.5" ipv6="::a:1:1:0:6" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="iosv-2" type="SIMPLE" subtype="IOSv" location="667,82" ipv4="192.168.0.3" ipv6="::b:1:0:0:3">
+ <extensions>
+ <entry key="config" type="string">!
+! Last configuration change at 16:41:10 UTC Fri Mar 27 2015
+!
+version 15.4
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+!
+hostname iosv-2
+!
+boot-start-marker
+boot-end-marker
+!
+!
+enable password cisco
+!
+no aaa new-model
+!
+!
+!
+mmi polling-interval 60
+no mmi auto-configure
+no mmi pvc
+mmi snmp-timeout 180
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+no ip domain lookup
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+multilink bundle-name authenticated
+!
+!
+cts logging verbose
+!
+!
+!
+redundancy
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.3 255.255.255.255
+ ipv6 address ::B:1:0:0:3/128
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ip address 172.16.1.191 255.255.255.0
+ duplex auto
+ speed auto
+ media-type rj45
+!
+interface GigabitEthernet0/1
+ description to vpp-2
+ ip address 10.0.0.21 255.255.255.252
+ duplex auto
+ speed auto
+ media-type rj45
+ ipv6 address ::A:1:1:0:16/126
+!
+ip forward-protocol nd
+!
+!
+no ip http server
+no ip http secure-server
+!
+ipv6 route ::A:1:1:0:6/128 ::A:1:1:0:17
+!
+!
+!
+control-plane
+!
+banner exec `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner incoming `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner login `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+!
+line con 0
+ password cisco
+line aux 0
+line vty 0 4
+ exec-timeout 720 0
+ password cisco
+ login
+ transport input telnet ssh
+!
+no scheduler allocate
+!
+end</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.21" ipv6="::a:1:1:0:16" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="iosv-3" type="SIMPLE" subtype="IOSv" location="194,186" ipv4="192.168.0.6" ipv6="::b:1:0:0:4">
+ <extensions>
+ <entry key="config" type="string">!
+! Last configuration change at 16:27:43 UTC Fri Mar 27 2015
+!
+version 15.4
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+!
+hostname iosv-3
+!
+boot-start-marker
+boot-end-marker
+!
+!
+enable password cisco
+!
+no aaa new-model
+!
+!
+!
+mmi polling-interval 60
+no mmi auto-configure
+no mmi pvc
+mmi snmp-timeout 180
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+no ip domain lookup
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+multilink bundle-name authenticated
+!
+!
+cts logging verbose
+!
+!
+!
+redundancy
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.6 255.255.255.255
+ ipv6 address ::B:1:0:0:4/128
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ip address 172.16.1.192 255.255.255.0
+ duplex auto
+ speed auto
+ media-type rj45
+!
+interface GigabitEthernet0/1
+ description to vpp-1
+ ip address 10.0.0.9 255.255.255.252
+ duplex auto
+ speed auto
+ media-type rj45
+ ipv6 address ::A:1:1:0:A/126
+!
+ip forward-protocol nd
+!
+!
+no ip http server
+no ip http secure-server
+!
+!
+!
+!
+control-plane
+!
+banner exec `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner incoming `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner login `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+!
+line con 0
+ password cisco
+line aux 0
+line vty 0 4
+ exec-timeout 720 0
+ password cisco
+ login
+ transport input telnet ssh
+!
+no scheduler allocate
+!
+end</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.9" ipv6="::a:1:1:0:a" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="iosv-4" type="SIMPLE" subtype="IOSv" location="668,193" ipv4="192.168.0.8" ipv6="::b:1:0:0:5">
+ <extensions>
+ <entry key="config" type="string">!
+! Last configuration change at 16:27:43 UTC Fri Mar 27 2015
+!
+version 15.4
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+!
+hostname iosv-4
+!
+boot-start-marker
+boot-end-marker
+!
+!
+enable password cisco
+!
+no aaa new-model
+!
+!
+!
+mmi polling-interval 60
+no mmi auto-configure
+no mmi pvc
+mmi snmp-timeout 180
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+no ip domain lookup
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+multilink bundle-name authenticated
+!
+!
+cts logging verbose
+!
+!
+!
+redundancy
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.8 255.255.255.255
+ ipv6 address ::B:1:0:0:5/128
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ip address 172.16.1.194 255.255.255.0
+ duplex auto
+ speed auto
+ media-type rj45
+!
+interface GigabitEthernet0/1
+ description to vpp-2
+ ip address 10.0.0.33 255.255.255.252
+ duplex auto
+ speed auto
+ media-type rj45
+ ipv6 address ::A:1:1:0:22/126
+!
+! Route to iosv-1
+ipv6 route ::A:1:1:0:6/128 ::A:1:1:0:23
+ip forward-protocol nd
+!
+!
+no ip http server
+no ip http secure-server
+!
+!
+!
+!
+control-plane
+!
+banner exec `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner incoming `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner login `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+!
+line con 0
+ password cisco
+line aux 0
+line vty 0 4
+ exec-timeout 720 0
+ password cisco
+ login
+ transport input telnet ssh
+!
+no scheduler allocate
+!
+end</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.33" ipv6="::a:1:1:0:22" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="vpp-1" type="SIMPLE" subtype="vPP" location="302,140">
+ <extensions>
+ <entry key="config" type="string">#cloud-config
+bootcmd:
+- ln -s -t /etc/rc.d /etc/rc.local
+hostname: vpp-1
+manage_etc_hosts: true
+runcmd:
+- start ttyS0
+- systemctl start getty@ttyS0.service
+- systemctl start rc-local
+- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config
+- echo "UseDNS no" >> /etc/ssh/sshd_config
+- service ssh restart
+- service sshd restart
+- sed -i 's/no-pci//' /cisco/etc/qn.conf
+- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf
+users:
+- default
+- gecos: User configured by VIRL Configuration Engine 0.15.3
+ lock-passwd: false
+ name: cisco
+ plain-text-passwd: cisco
+ shell: /bin/bash
+ ssh-authorized-keys:
+ - VIRL-USER-SSH-PUBLIC-KEY
+ sudo: ALL=(ALL) ALL
+write_files:
+- path: /etc/init/ttyS0.conf
+ owner: root:root
+ content: |
+ # ttyS0 - getty
+ # This service maintains a getty on ttyS0 from the point the system is
+ # started until it is shut down again.
+ start on stopped rc or RUNLEVEL=[12345]
+ stop on runlevel [!12345]
+ respawn
+ exec /sbin/getty -L 115200 ttyS0 vt102
+ permissions: '0644'
+- path: /etc/systemd/system/dhclient@.service
+ content: |
+ [Unit]
+ Description=Run dhclient on %i interface
+ After=network.target
+ [Service]
+ Type=oneshot
+ ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease
+ RemainAfterExit=yes
+ owner: root:root
+ permissions: '0644'
+- path: /cisco/etc/vpp-server.conf
+ owner: root:root
+ permissions: '0644'
+ content: |-
+ set interface ip address GigabitEthernet0/4/0 10.0.0.6/30
+ set interface ip address GigabitEthernet0/4/0 ::a:1:1:0:7/126
+ set interface state GigabitEthernet0/4/0 up
+ set interface ip address GigabitEthernet0/5/0 10.0.0.10/30
+ set interface ip address GigabitEthernet0/5/0 ::a:1:1:0:b/126
+ set interface state GigabitEthernet0/5/0 up
+ set interface ip address GigabitEthernet0/6/0 10.0.0.13/30
+ set interface ip address GigabitEthernet0/6/0 ::a:1:1:0:e/126
+ set interface state GigabitEthernet0/6/0 up
+ set interface ip address GigabitEthernet0/7/0 10.0.0.17/30
+ set interface ip address GigabitEthernet0/7/0 ::a:1:1:0:12/126
+ set interface state GigabitEthernet0/7/0 up
+ sr tunnel src 0::a:1:1:0:6 dst 0::a:1:1:0:16/128 next 0::a:1:1:0:f next 0::a:1:1:0:1a next 0::a:1:1:0:16 tag 0::a:1:1:0:7 InPE 1 clean
+ sr tunnel src 0::a:1:1:0:6 dst 0::a:1:1:0:22/128 next 0::a:1:1:0:f next 0::a:1:1:0:1a next 0::a:1:1:0:22 tag 0::a:1:1:0:7 InPE 1 clean
+</entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.6" ipv6="::a:1:1:0:7" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.10" ipv6="::a:1:1:0:b" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="2" name="GigabitEthernet0/6/0" ipv4="10.0.0.13" ipv6="::a:1:1:0:e" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="3" name="GigabitEthernet0/7/0" ipv4="10.0.0.17" ipv6="::a:1:1:0:12" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="vpp-2" type="SIMPLE" subtype="vPP" location="541,147">
+ <extensions>
+ <entry key="config" type="string">#cloud-config
+bootcmd:
+- ln -s -t /etc/rc.d /etc/rc.local
+hostname: vpp-2
+manage_etc_hosts: true
+runcmd:
+- start ttyS0
+- systemctl start getty@ttyS0.service
+- systemctl start rc-local
+- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config
+- echo "UseDNS no" >> /etc/ssh/sshd_config
+- service ssh restart
+- service sshd restart
+- sed -i 's/no-pci//' /cisco/etc/qn.conf
+- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf
+users:
+- default
+- gecos: User configured by VIRL Configuration Engine 0.15.3
+ lock-passwd: false
+ name: cisco
+ plain-text-passwd: cisco
+ shell: /bin/bash
+ ssh-authorized-keys:
+ - VIRL-USER-SSH-PUBLIC-KEY
+ sudo: ALL=(ALL) ALL
+write_files:
+- path: /etc/init/ttyS0.conf
+ owner: root:root
+ content: |
+ # ttyS0 - getty
+ # This service maintains a getty on ttyS0 from the point the system is
+ # started until it is shut down again.
+ start on stopped rc or RUNLEVEL=[12345]
+ stop on runlevel [!12345]
+ respawn
+ exec /sbin/getty -L 115200 ttyS0 vt102
+ permissions: '0644'
+- path: /etc/systemd/system/dhclient@.service
+ content: |
+ [Unit]
+ Description=Run dhclient on %i interface
+ After=network.target
+ [Service]
+ Type=oneshot
+ ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease
+ RemainAfterExit=yes
+ owner: root:root
+ permissions: '0644'
+- path: /cisco/etc/vpp-server.conf
+ owner: root:root
+ permissions: '0644'
+ content: |-
+ set interface ip address GigabitEthernet0/4/0 10.0.0.25/30
+ set interface ip address GigabitEthernet0/4/0 ::a:1:1:0:1a/126
+ set interface state GigabitEthernet0/4/0 up
+ set interface ip address GigabitEthernet0/5/0 10.0.0.29/30
+ set interface ip address GigabitEthernet0/5/0 ::a:1:1:0:1e/126
+ set interface state GigabitEthernet0/5/0 up
+ set interface ip address GigabitEthernet0/6/0 10.0.0.22/30
+ set interface ip address GigabitEthernet0/6/0 ::a:1:1:0:17/126
+ set interface state GigabitEthernet0/6/0 up
+ set interface ip address GigabitEthernet0/7/0 10.0.0.34/30
+ set interface ip address GigabitEthernet0/7/0 ::a:1:1:0:23/126
+ set interface state GigabitEthernet0/7/0 up
+ sr tunnel src 0::a:1:1:0:16 dst 0::a:1:1:0:6/128 next 0::a:1:1:0:1b next 0::a:1:1:0:e next 0::a:1:1:0:6 tag 0::a:1:1:0:17 InPE 1 clean
+</entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.25" ipv6="::a:1:1:0:1a" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.29" ipv6="::a:1:1:0:1e" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="2" name="GigabitEthernet0/6/0" ipv4="10.0.0.22" ipv6="::a:1:1:0:17" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="3" name="GigabitEthernet0/7/0" ipv4="10.0.0.34" ipv6="::a:1:1:0:23" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="vpp-3" type="SIMPLE" subtype="vPP" location="415,89">
+ <extensions>
+ <entry key="config" type="string">#cloud-config
+bootcmd:
+- ln -s -t /etc/rc.d /etc/rc.local
+hostname: vpp-3
+manage_etc_hosts: true
+runcmd:
+- start ttyS0
+- systemctl start getty@ttyS0.service
+- systemctl start rc-local
+- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config
+- echo "UseDNS no" >> /etc/ssh/sshd_config
+- service ssh restart
+- service sshd restart
+- sed -i 's/no-pci//' /cisco/etc/qn.conf
+- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf
+users:
+- default
+- gecos: User configured by VIRL Configuration Engine 0.15.3
+ lock-passwd: false
+ name: cisco
+ plain-text-passwd: cisco
+ shell: /bin/bash
+ ssh-authorized-keys:
+ - VIRL-USER-SSH-PUBLIC-KEY
+ sudo: ALL=(ALL) ALL
+write_files:
+- path: /etc/init/ttyS0.conf
+ owner: root:root
+ content: |
+ # ttyS0 - getty
+ # This service maintains a getty on ttyS0 from the point the system is
+ # started until it is shut down again.
+ start on stopped rc or RUNLEVEL=[12345]
+ stop on runlevel [!12345]
+ respawn
+ exec /sbin/getty -L 115200 ttyS0 vt102
+ permissions: '0644'
+- path: /etc/systemd/system/dhclient@.service
+ content: |
+ [Unit]
+ Description=Run dhclient on %i interface
+ After=network.target
+ [Service]
+ Type=oneshot
+ ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease
+ RemainAfterExit=yes
+ owner: root:root
+ permissions: '0644'
+- path: /cisco/etc/vpp-server.conf
+ owner: root:root
+ permissions: '0644'
+ content: |-
+ set interface ip address GigabitEthernet0/4/0 10.0.0.14/30
+ set interface ip address GigabitEthernet0/4/0 ::a:1:1:0:f/126
+ set interface state GigabitEthernet0/4/0 up
+ set interface ip address GigabitEthernet0/5/0 10.0.0.26/30
+ set interface ip address GigabitEthernet0/5/0 ::a:1:1:0:1b/126
+ set interface state GigabitEthernet0/5/0 up
+ comment { fix src rpf drop screws}
+ ip route add ::a:1:1:0:6/128 via drop
+ ip route add ::a:1:1:0:16/128 via drop
+ ip route add ::a:1:1:0:22/128 via drop
+</entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.14" ipv6="::a:1:1:0:f" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.26" ipv6="::a:1:1:0:1b" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="vpp-4" type="SIMPLE" subtype="vPP" location="419,216">
+ <extensions>
+ <entry key="config" type="string">#cloud-config
+bootcmd:
+- ln -s -t /etc/rc.d /etc/rc.local
+hostname: vpp-4
+manage_etc_hosts: true
+runcmd:
+- start ttyS0
+- systemctl start getty@ttyS0.service
+- systemctl start rc-local
+- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config
+- echo "UseDNS no" >> /etc/ssh/sshd_config
+- service ssh restart
+- service sshd restart
+- sed -i 's/no-pci//' /cisco/etc/qn.conf
+- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf
+users:
+- default
+- gecos: User configured by VIRL Configuration Engine 0.15.3
+ lock-passwd: false
+ name: cisco
+ plain-text-passwd: cisco
+ shell: /bin/bash
+ ssh-authorized-keys:
+ - VIRL-USER-SSH-PUBLIC-KEY
+ sudo: ALL=(ALL) ALL
+write_files:
+- path: /etc/init/ttyS0.conf
+ owner: root:root
+ content: |
+ # ttyS0 - getty
+ # This service maintains a getty on ttyS0 from the point the system is
+ # started until it is shut down again.
+ start on stopped rc or RUNLEVEL=[12345]
+ stop on runlevel [!12345]
+ respawn
+ exec /sbin/getty -L 115200 ttyS0 vt102
+ permissions: '0644'
+- path: /etc/systemd/system/dhclient@.service
+ content: |
+ [Unit]
+ Description=Run dhclient on %i interface
+ After=network.target
+ [Service]
+ Type=oneshot
+ ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease
+ RemainAfterExit=yes
+ owner: root:root
+ permissions: '0644'
+- path: /cisco/etc/vpp-server.conf
+ owner: root:root
+ permissions: '0644'
+ content: |-
+ set interface ip address GigabitEthernet0/4/0 10.0.0.18/30
+ set interface ip address GigabitEthernet0/4/0 ::a:1:1:0:13/126
+ set interface state GigabitEthernet0/4/0 up
+ set interface ip address GigabitEthernet0/5/0 10.0.0.30/30
+ set interface ip address GigabitEthernet0/5/0 ::a:1:1:0:1f/126
+ set interface state GigabitEthernet0/5/0 up
+</entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.18" ipv6="::a:1:1:0:13" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.30" ipv6="::a:1:1:0:1f" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <connection dst="/virl:topology/virl:node[5]/virl:interface[1]" src="/virl:topology/virl:node[1]/virl:interface[1]"/>
+ <connection dst="/virl:topology/virl:node[5]/virl:interface[2]" src="/virl:topology/virl:node[3]/virl:interface[1]"/>
+ <connection dst="/virl:topology/virl:node[7]/virl:interface[1]" src="/virl:topology/virl:node[5]/virl:interface[3]"/>
+ <connection dst="/virl:topology/virl:node[8]/virl:interface[1]" src="/virl:topology/virl:node[5]/virl:interface[4]"/>
+ <connection dst="/virl:topology/virl:node[6]/virl:interface[1]" src="/virl:topology/virl:node[7]/virl:interface[2]"/>
+ <connection dst="/virl:topology/virl:node[6]/virl:interface[2]" src="/virl:topology/virl:node[8]/virl:interface[2]"/>
+ <connection dst="/virl:topology/virl:node[2]/virl:interface[1]" src="/virl:topology/virl:node[6]/virl:interface[3]"/>
+ <connection dst="/virl:topology/virl:node[4]/virl:interface[1]" src="/virl:topology/virl:node[6]/virl:interface[4]"/>
+</topology>
diff --git a/vnet/etc/scripts/virl/ip6sr_notes.txt b/vnet/etc/scripts/virl/ip6sr_notes.txt new file mode 100644 index 00000000000..5febf2c690b --- /dev/null +++ b/vnet/etc/scripts/virl/ip6sr_notes.txt @@ -0,0 +1,38 @@ +vpp-1, tunnel 1: iosv-1 vpp-1 vpp-3 vpp-2 iosv-2
+
+iosv-1
+::a:1:1:0:6
+
+to iosv2
+ipv6 route ::a:1:1:0:16/128 ::a:1:1:0:7
+to iosv4
+ipv6 route ::a:1:1:0:22/128 ::a:1:1:0:7
+
+vpp-1
+
+::a:1:1:0:7
+...
+::a:1:1:0:e
+
+sr tunnel src 0::a:1:1:0:6 dst 0::a:1:1:0:16/128 next 0::a:1:1:0:f next 0::a:1:1:0:1a next 0::a:1:1:0:16 tag 0::a:1:1:0:7 InPE 1 clean
+
+vpp-3
+::a:1:1:0:f
+..
+::a:1:1:0:1b
+
+comment {fix src rpf screws}
+ip route add ::a:1:1:0:6/128 via drop
+ip route add ::a:1:1:0:16/128 via drop
+ip route add ::a:1:1:0:22/128 via drop
+vpp-2
+::a:1:1:0:1a
+..
+::a:1:1:0:17
+
+sr tunnel src 0::a:1:1:0:16 dst 0::a:1:1:0:6/128 next 0::a:1:1:0:1b next 0::a:1:1:0:e next 0::a:1:1:0:6 tag 0::a:1:1:0:17 InPE 1 clean
+
+iosv-2
+::a:1:1:0:16
+
+ipv6 route ::a:1:1:0:6/128 ::a:1:1:0:17
diff --git a/vnet/etc/scripts/virl/mplsogre.virl b/vnet/etc/scripts/virl/mplsogre.virl new file mode 100644 index 00000000000..33dd03299d4 --- /dev/null +++ b/vnet/etc/scripts/virl/mplsogre.virl @@ -0,0 +1,319 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<topology xmlns="http://www.cisco.com/VIRL" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" schemaVersion="0.9" xsi:schemaLocation="http://www.cisco.com/VIRL https://raw.github.com/CiscoVIRL/schema/v0.9/virl.xsd"> + <extensions> + <entry key="management_network" type="String">flat</entry> + </extensions> + <node name="vpp-1" type="SIMPLE" subtype="vPP" location="467,307" vmImage="vPP-hacker [445e503a-564e-4c14-93e1-9660957c539a]" vmFlavor="vPP-small [e7f7de38-54e3-46f1-92ef-92b0039c1c60]"> + <extensions> + <entry key="config" type="string">#cloud-config +bootcmd: +- ln -s -t /etc/rc.d /etc/rc.local +hostname: vpp-1 +manage_etc_hosts: true +runcmd: +- start ttyS0 +- systemctl start getty@ttyS0.service +- systemctl start rc-local +- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config +- echo "UseDNS no" >> /etc/ssh/sshd_config +- service ssh restart +- service sshd restart +- sed -i 's/no-pci//' /cisco/etc/qn.conf +- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf +users: +- default +- gecos: User configured by VIRL Configuration Engine 0.15.3 + lock-passwd: false + name: cisco + plain-text-passwd: cisco + shell: /bin/bash + ssh-authorized-keys: + - VIRL-USER-SSH-PUBLIC-KEY + sudo: ALL=(ALL) ALL +write_files: +- path: /etc/init/ttyS0.conf + owner: root:root + content: | + # ttyS0 - getty + # This service maintains a getty on ttyS0 from the point the system is + # started until it is shut down again. + start on stopped rc or RUNLEVEL=[12345] + stop on runlevel [!12345] + respawn + exec /sbin/getty -L 115200 ttyS0 vt102 + permissions: '0644' +- path: /etc/systemd/system/dhclient@.service + content: | + [Unit] + Description=Run dhclient on %i interface + After=network.target + [Service] + Type=oneshot + ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease + RemainAfterExit=yes + owner: root:root + permissions: '0644' +- path: /cisco/etc/vpp-server.conf + owner: root:root + permissions: '0644' + content: |- + set interface ip address GigabitEthernet0/4/0 10.0.0.6/30 + set interface state GigabitEthernet0/4/0 up + set interface ip address GigabitEthernet0/5/0 10.0.0.9/30 + set interface state GigabitEthernet0/5/0 up + mpls encap add label 30 fib 0 dest 10.0.0.10 + mpls decap add label 30 fib 0 + create mpls gre tunnel src 10.0.0.9 dst 10.0.0.10 intfc 10.0.0.13/30 inner-fib-id 0 outer-fib-id 0 +</entry> + </extensions> + <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.6" netPrefixLenV4="30"/> + <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.9" netPrefixLenV4="30"/> + </node> + <node name="vpp-2" type="SIMPLE" subtype="vPP" location="591,305" vmImage="vPP-hacker [445e503a-564e-4c14-93e1-9660957c539a]" vmFlavor="vPP-small [e7f7de38-54e3-46f1-92ef-92b0039c1c60]"> + <extensions> + <entry key="config" type="string">#cloud-config +bootcmd: +- ln -s -t /etc/rc.d /etc/rc.local +hostname: vpp-2 +manage_etc_hosts: true +runcmd: +- start ttyS0 +- systemctl start getty@ttyS0.service +- systemctl start rc-local +- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config +- echo "UseDNS no" >> /etc/ssh/sshd_config +- service ssh restart +- service sshd restart +- sed -i 's/no-pci//' /cisco/etc/qn.conf +- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf +users: +- default +- gecos: User configured by VIRL Configuration Engine 0.15.3 + lock-passwd: false + name: cisco + plain-text-passwd: cisco + shell: /bin/bash + ssh-authorized-keys: + - VIRL-USER-SSH-PUBLIC-KEY + sudo: ALL=(ALL) ALL +write_files: +- path: /etc/init/ttyS0.conf + owner: root:root + content: | + # ttyS0 - getty + # This service maintains a getty on ttyS0 from the point the system is + # started until it is shut down again. + start on stopped rc or RUNLEVEL=[12345] + stop on runlevel [!12345] + respawn + exec /sbin/getty -L 115200 ttyS0 vt102 + permissions: '0644' +- path: /etc/systemd/system/dhclient@.service + content: | + [Unit] + Description=Run dhclient on %i interface + After=network.target + [Service] + Type=oneshot + ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease + RemainAfterExit=yes + owner: root:root + permissions: '0644' +- path: /cisco/etc/vpp-server.conf + owner: root:root + permissions: '0644' + content: |- + set interface ip address GigabitEthernet0/4/0 10.0.0.10/30 + set interface state GigabitEthernet0/4/0 up + set interface ip address GigabitEthernet0/5/0 10.0.0.14/30 + set interface state GigabitEthernet0/5/0 up + mpls encap add label 30 fib 0 dest 10.0.0.9 + mpls decap add label 30 fib 0 + create mpls gre tunnel src 10.0.0.10 dst 10.0.0.9 intfc 10.0.0.5/30 inner-fib-id 0 outer-fib-id 0 +</entry> + </extensions> + <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.10" netPrefixLenV4="30"/> + <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.14" netPrefixLenV4="30"/> + </node> + <node name="iosv-1" type="SIMPLE" subtype="IOSv" location="326,311" ipv4="192.168.0.1"> + <extensions> + <entry key="config" type="string">! IOS Config generated on 2015-03-03 17:26 +! by autonetkit_0.15.0 +! +hostname iosv-1 +boot-start-marker +boot-end-marker +! +no aaa new-model +! +! +ip cef +ipv6 unicast-routing +ipv6 cef +! +! +service timestamps debug datetime msec +service timestamps log datetime msec +no service password-encryption +no service config +enable password cisco +ip classless +ip subnet-zero +no ip domain lookup +line vty 0 4 + transport input ssh telnet + exec-timeout 720 0 + password cisco + login +line con 0 + password cisco +! +! +interface Loopback0 + description Loopback + ip address 192.168.0.1 255.255.255.255 +! +interface GigabitEthernet0/0 + description OOB Management + ! Configured on launch + no ip address + duplex auto + speed auto + no shutdown +! +interface GigabitEthernet0/1 + description to vpp-1 + ip address 10.0.0.5 255.255.255.252 + ip ospf cost 1 + duplex auto + speed auto + no shutdown +! +! +! +router ospf 1 + network 192.168.0.1 0.0.0.0 area 0 + log-adjacency-changes + passive-interface Loopback0 + network 10.0.0.4 0.0.0.3 area 0 +! +! +router bgp 1 + bgp router-id 192.168.0.1 + no synchronization +! ibgp + ! ibgp peers + ! + neighbor 192.168.0.4 remote-as 1 + neighbor 192.168.0.4 description iBGP peer iosv-2 + neighbor 192.168.0.4 update-source Loopback0 +! +! + ! + address-family ipv4 + network 192.168.0.1 mask 255.255.255.255 + neighbor 192.168.0.4 activate + exit-address-family +! +! +! +ip route 10.0.0.13 255.255.255.255 10.0.0.6 +end +</entry> + <entry key="AutoNetkit.mgmt_ip" type="string"></entry> + </extensions> + <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.5" netPrefixLenV4="30"/> + </node> + <node name="iosv-2" type="SIMPLE" subtype="IOSv" location="741,307" ipv4="192.168.0.4"> + <extensions> + <entry key="config" type="string">! IOS Config generated on 2015-03-03 17:26 +! by autonetkit_0.15.0 +! +hostname iosv-2 +boot-start-marker +boot-end-marker +! +no aaa new-model +! +! +ip cef +ipv6 unicast-routing +ipv6 cef +! +! +service timestamps debug datetime msec +service timestamps log datetime msec +no service password-encryption +no service config +enable password cisco +ip classless +ip subnet-zero +no ip domain lookup +line vty 0 4 + transport input ssh telnet + exec-timeout 720 0 + password cisco + login +line con 0 + password cisco +! +! +interface Loopback0 + description Loopback + ip address 192.168.0.4 255.255.255.255 +! +interface GigabitEthernet0/0 + description OOB Management + ! Configured on launch + no ip address + duplex auto + speed auto + no shutdown +! +interface GigabitEthernet0/1 + description to vpp-2 + ip address 10.0.0.13 255.255.255.252 + ip ospf cost 1 + duplex auto + speed auto + no shutdown +! +! +! +router ospf 1 + network 192.168.0.4 0.0.0.0 area 0 + log-adjacency-changes + passive-interface Loopback0 + network 10.0.0.12 0.0.0.3 area 0 +! +! +router bgp 1 + bgp router-id 192.168.0.4 + no synchronization +! ibgp + ! ibgp peers + ! + neighbor 192.168.0.1 remote-as 1 + neighbor 192.168.0.1 description iBGP peer iosv-1 + neighbor 192.168.0.1 update-source Loopback0 +! +! + ! + address-family ipv4 + network 192.168.0.4 mask 255.255.255.255 + neighbor 192.168.0.1 activate + exit-address-family +! +! +ip route 10.0.0.5 255.255.255.255 10.0.0.14 +! +end +</entry> + <entry key="AutoNetkit.mgmt_ip" type="string"></entry> + </extensions> + <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.13" netPrefixLenV4="30"/> + </node> + <connection dst="/virl:topology/virl:node[1]/virl:interface[1]" src="/virl:topology/virl:node[3]/virl:interface[1]"/> + <connection dst="/virl:topology/virl:node[2]/virl:interface[1]" src="/virl:topology/virl:node[1]/virl:interface[2]"/> + <connection dst="/virl:topology/virl:node[4]/virl:interface[1]" src="/virl:topology/virl:node[2]/virl:interface[2]"/> +</topology> diff --git a/vnet/etc/scripts/virl/simple.virl b/vnet/etc/scripts/virl/simple.virl new file mode 100644 index 00000000000..6033c42c36a --- /dev/null +++ b/vnet/etc/scripts/virl/simple.virl @@ -0,0 +1,389 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<topology xmlns="http://www.cisco.com/VIRL" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" schemaVersion="0.9" xsi:schemaLocation="http://www.cisco.com/VIRL https://raw.github.com/CiscoVIRL/schema/v0.9/virl.xsd"> + <extensions> + <entry key="management_network" type="String">flat</entry> + </extensions> + <node name="iosv-1" type="SIMPLE" subtype="IOSv" location="427,219" ipv4="192.168.0.2"> + <extensions> + <entry key="config" type="string">! +! Last configuration change at 14:27:32 UTC Fri Mar 27 2015 +! +version 15.4 +service timestamps debug datetime msec +service timestamps log datetime msec +no service password-encryption +! +hostname iosv-1 +! +boot-start-marker +boot-end-marker +! +! +enable password cisco +! +no aaa new-model +! +! +! +mmi polling-interval 60 +no mmi auto-configure +no mmi pvc +mmi snmp-timeout 180 +! +! +! +! +! +! +! +! +! +! +! +! +! +no ip domain lookup +ip cef +ipv6 unicast-routing +ipv6 cef +! +multilink bundle-name authenticated +! +! +cts logging verbose +! +! +! +redundancy +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +interface Loopback0 + description Loopback + ip address 192.168.0.2 255.255.255.255 +! +interface GigabitEthernet0/0 + description OOB Management + ip address 172.16.1.167 255.255.255.0 + duplex auto + speed auto + media-type rj45 +! +interface GigabitEthernet0/1 + description to vpp-1 + ip address 10.0.0.5 255.255.255.252 + ip ospf cost 1 + duplex auto + speed auto + media-type rj45 +! +router ospf 1 + passive-interface Loopback0 + network 10.0.0.4 0.0.0.3 area 0 + network 192.168.0.2 0.0.0.0 area 0 +! +router bgp 1 + bgp router-id 192.168.0.2 + bgp log-neighbor-changes + neighbor 192.168.0.3 remote-as 1 + neighbor 192.168.0.3 description iBGP peer iosv-2 + neighbor 192.168.0.3 update-source Loopback0 + ! + address-family ipv4 + network 192.168.0.2 mask 255.255.255.255 + neighbor 192.168.0.3 activate + exit-address-family +! +ip forward-protocol nd +! +! +no ip http server +no ip http secure-server +ip route 10.0.0.9 255.255.255.255 10.0.0.6 +! +! +! +! +control-plane +! +banner exec ` +************************************************************************** +* IOSv is strictly limited to use for evaluation, demonstration and IOS * +* education. IOSv is provided as-is and is not supported by Cisco's * +* Technical Advisory Center. Any use or disclosure, in whole or in part, * +* of the IOSv Software or Documentation to any third party for any * +* purposes is expressly prohibited except as otherwise authorized by * +* Cisco in writing. * +**************************************************************************` +banner incoming ` +************************************************************************** +* IOSv is strictly limited to use for evaluation, demonstration and IOS * +* education. IOSv is provided as-is and is not supported by Cisco's * +* Technical Advisory Center. Any use or disclosure, in whole or in part, * +* of the IOSv Software or Documentation to any third party for any * +* purposes is expressly prohibited except as otherwise authorized by * +* Cisco in writing. * +**************************************************************************` +banner login ` +************************************************************************** +* IOSv is strictly limited to use for evaluation, demonstration and IOS * +* education. IOSv is provided as-is and is not supported by Cisco's * +* Technical Advisory Center. Any use or disclosure, in whole or in part, * +* of the IOSv Software or Documentation to any third party for any * +* purposes is expressly prohibited except as otherwise authorized by * +* Cisco in writing. * +**************************************************************************` +! +line con 0 + password cisco +line aux 0 +line vty 0 4 + exec-timeout 720 0 + password cisco + login + transport input telnet ssh +! +no scheduler allocate +! +end</entry> + <entry key="AutoNetkit.mgmt_ip" type="string"></entry> + </extensions> + <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.5" netPrefixLenV4="30"/> + </node> + <node name="iosv-2" type="SIMPLE" subtype="IOSv" location="704,221" ipv4="192.168.0.3"> + <extensions> + <entry key="config" type="string">! +! Last configuration change at 14:26:58 UTC Fri Mar 27 2015 +! +version 15.4 +service timestamps debug datetime msec +service timestamps log datetime msec +no service password-encryption +! +hostname iosv-2 +! +boot-start-marker +boot-end-marker +! +! +enable password cisco +! +no aaa new-model +! +! +! +mmi polling-interval 60 +no mmi auto-configure +no mmi pvc +mmi snmp-timeout 180 +! +! +! +! +! +! +! +! +! +! +! +! +! +no ip domain lookup +ip cef +ipv6 unicast-routing +ipv6 cef +! +multilink bundle-name authenticated +! +! +cts logging verbose +! +! +! +redundancy +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +interface Loopback0 + description Loopback + ip address 192.168.0.3 255.255.255.255 +! +interface GigabitEthernet0/0 + description OOB Management + ip address 172.16.1.164 255.255.255.0 + duplex auto + speed auto + media-type rj45 +! +interface GigabitEthernet0/1 + description to vpp-1 + ip address 10.0.0.9 255.255.255.252 + ip ospf cost 1 + duplex auto + speed auto + media-type rj45 +! +router ospf 1 + passive-interface Loopback0 + network 10.0.0.8 0.0.0.3 area 0 + network 192.168.0.3 0.0.0.0 area 0 +! +router bgp 1 + bgp router-id 192.168.0.3 + bgp log-neighbor-changes + neighbor 192.168.0.2 remote-as 1 + neighbor 192.168.0.2 description iBGP peer iosv-1 + neighbor 192.168.0.2 update-source Loopback0 + ! + address-family ipv4 + network 192.168.0.3 mask 255.255.255.255 + neighbor 192.168.0.2 activate + exit-address-family +! +ip forward-protocol nd +! +! +no ip http server +no ip http secure-server +ip route 10.0.0.5 255.255.255.255 10.0.0.10 +! +! +! +! +control-plane +! +banner exec ` +************************************************************************** +* IOSv is strictly limited to use for evaluation, demonstration and IOS * +* education. IOSv is provided as-is and is not supported by Cisco's * +* Technical Advisory Center. Any use or disclosure, in whole or in part, * +* of the IOSv Software or Documentation to any third party for any * +* purposes is expressly prohibited except as otherwise authorized by * +* Cisco in writing. * +**************************************************************************` +banner incoming ` +************************************************************************** +* IOSv is strictly limited to use for evaluation, demonstration and IOS * +* education. IOSv is provided as-is and is not supported by Cisco's * +* Technical Advisory Center. Any use or disclosure, in whole or in part, * +* of the IOSv Software or Documentation to any third party for any * +* purposes is expressly prohibited except as otherwise authorized by * +* Cisco in writing. * +**************************************************************************` +banner login ` +************************************************************************** +* IOSv is strictly limited to use for evaluation, demonstration and IOS * +* education. IOSv is provided as-is and is not supported by Cisco's * +* Technical Advisory Center. Any use or disclosure, in whole or in part, * +* of the IOSv Software or Documentation to any third party for any * +* purposes is expressly prohibited except as otherwise authorized by * +* Cisco in writing. * +**************************************************************************` +! +line con 0 + password cisco +line aux 0 +line vty 0 4 + exec-timeout 720 0 + password cisco + login + transport input telnet ssh +! +no scheduler allocate +! +end</entry> + <entry key="AutoNetkit.mgmt_ip" type="string"></entry> + </extensions> + <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.9" netPrefixLenV4="30"/> + </node> + <node name="vpp-1" type="SIMPLE" subtype="vPP" location="561,217"> + <extensions> + <entry key="config" type="string">#cloud-config +bootcmd: +- ln -s -t /etc/rc.d /etc/rc.local +hostname: vpp-1 +manage_etc_hosts: true +runcmd: +- start ttyS0 +- systemctl start getty@ttyS0.service +- systemctl start rc-local +- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config +- echo "UseDNS no" >> /etc/ssh/sshd_config +- service ssh restart +- service sshd restart +- sed -i 's/no-pci//' /cisco/etc/qn.conf +- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf +users: +- default +- gecos: User configured by VIRL Configuration Engine 0.15.3 + lock-passwd: false + name: cisco + plain-text-passwd: cisco + shell: /bin/bash + ssh-authorized-keys: + - VIRL-USER-SSH-PUBLIC-KEY + sudo: ALL=(ALL) ALL +write_files: +- path: /etc/init/ttyS0.conf + owner: root:root + content: | + # ttyS0 - getty + # This service maintains a getty on ttyS0 from the point the system is + # started until it is shut down again. + start on stopped rc or RUNLEVEL=[12345] + stop on runlevel [!12345] + respawn + exec /sbin/getty -L 115200 ttyS0 vt102 + permissions: '0644' +- path: /etc/systemd/system/dhclient@.service + content: | + [Unit] + Description=Run dhclient on %i interface + After=network.target + [Service] + Type=oneshot + ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease + RemainAfterExit=yes + owner: root:root + permissions: '0644' +- path: /cisco/etc/vpp-server.conf + owner: root:root + permissions: '0644' + content: |- + set interface ip address GigabitEthernet0/4/0 10.0.0.6/30 + set interface state GigabitEthernet0/4/0 up + set interface ip address GigabitEthernet0/5/0 10.0.0.10/30 + set interface state GigabitEthernet0/5/0 up +</entry> + </extensions> + <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.6" netPrefixLenV4="30"/> + <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.10" netPrefixLenV4="30"/> + </node> + <connection dst="/virl:topology/virl:node[3]/virl:interface[1]" src="/virl:topology/virl:node[1]/virl:interface[1]"/> + <connection dst="/virl:topology/virl:node[2]/virl:interface[1]" src="/virl:topology/virl:node[3]/virl:interface[2]"/> +</topology> diff --git a/vnet/etc/scripts/vlan b/vnet/etc/scripts/vlan new file mode 100644 index 00000000000..076080a6978 --- /dev/null +++ b/vnet/etc/scripts/vlan @@ -0,0 +1,23 @@ +int create-ethernet +int create-sub fake-eth0 1 +set int state fake-eth0 up +set int state fake-eth0.1 up + +packet-generator new { + name x + limit 1 + node ethernet-input + interface fake-eth0 + size 64-64 + no-recycle + data { + IP4: 1.2.3 -> 4.5.6 vlan 1 + ICMP: 1.2.3.4 -> 5.6.7.8 + ICMP echo_request + incrementing 100 + } +} + +tr add pg-input 100 +ip route 5.6.7.8/32 via local +ip route 1.2.3.4/32 via local diff --git a/vnet/example/main_stub.c b/vnet/example/main_stub.c new file mode 100644 index 00000000000..c45932baa39 --- /dev/null +++ b/vnet/example/main_stub.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/tcp.h> + +#include <vlib/unix/cj.h> + +DECLARE_CJ_GLOBAL_LOG + +static clib_error_t * +vnet_example_init (vlib_main_t * vm) +{ + clib_error_t * error = 0; + + /* Due to crude comment-out of eliot's smp stuff */ + vm->heap_size = 256<<20; + + if ((error = vlib_call_init_function (vm, pg_init))) + return error; + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + if ((error = vlib_call_init_function (vm, ethernet_init))) + return error; + if ((error = vlib_call_init_function (vm, ethernet_arp_init))) + return error; + if ((error = vlib_call_init_function (vm, osi_init))) + return error; + if ((error = vlib_call_init_function (vm, srp_init))) + return error; +#if DPDK == 0 + if ((error = vlib_call_init_function (vm, ixge_init))) + return error; + if ((error = vlib_call_init_function (vm, ixgev_init))) + return error; + if ((error = vlib_call_init_function (vm, ige_init))) + return error; +#else + if ((error = vlib_call_init_function (vm, dpdk_init))) + return error; +#endif + + if ((error = vlib_call_init_function (vm, dhcp_proxy_init))) + return error; + if ((error = vlib_call_init_function (vm, mpls_init))) + return error; + if ((error = vlib_call_init_function (vm, mpls_interface_init))) + return error; + + if ((error = vlib_call_init_function (vm, l2_init))) + return error; + if ((error = vlib_call_init_function (vm, l2tp_init))) + return error; + + if ((error = unix_physmem_init (vm, /* physical_memory_required */ 0))) + return error; + + if ((error = unix_physmem_init (vm, /* physical_memory_required */ 0))) + return error; + + if ((error = vlib_call_init_function (vm, tuntap_init))) + return error; + + vlib_unix_cli_set_prompt ("VNET: "); + + return error; +} + +VLIB_INIT_FUNCTION (vnet_example_init); + +int main (int argc, char * argv[]) +{ + clib_mem_init (0, (2ULL << 30)); + return vlib_unix_main (argc, argv); +} + +#if 0 +#define foreach_tcp_test_error \ + _ (SEGMENTS_RECEIVED, "segments received") + +typedef enum { +#define _(sym,str) TCP_TEST_ERROR_##sym, + foreach_tcp_test_error +#undef _ + TCP_TEST_N_ERROR, +} tcp_test_error_t; + +static char * tcp_test_error_strings[] = { +#define _(sym,string) string, + foreach_tcp_test_error +#undef _ +}; + +static uword +tcp_test (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + uword n_packets = frame->n_vectors; + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next; + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + u32 bi0; + u8 error0, next0; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + + clib_warning ("got '%U'", format_vlib_buffer_contents, vm, p0); + + error0 = next0 = 0; + p0->error = node->errors[error0]; + + if (PREDICT_FALSE (next0 != next)) + { + to_next -= 1; + n_left_to_next += 1; + + vlib_put_next_frame (vm, node, next, n_left_to_next); + + next = next0; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (tcp_test_node) = { + .function = tcp_test, + .name = "tcp-test", + + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, + + .n_errors = TCP_TEST_N_ERROR, + .error_strings = tcp_test_error_strings, +}; + +static clib_error_t * +tcp_test_init (vlib_main_t * vm) +{ + clib_error_t * error = 0; + + { + tcp_listener_registration_t r = { + .port = 1234, + .flags = TCP_LISTENER_IP4, + .data_node_index = tcp_test_node.index, + .event_function = 0, + }; + + tcp_register_listener (vm, &r); + } + + return error; +} + +VLIB_INIT_FUNCTION (tcp_test_init); + +#endif diff --git a/vnet/example/rtt_test.c b/vnet/example/rtt_test.c new file mode 100644 index 00000000000..98f0cdf2f6b --- /dev/null +++ b/vnet/example/rtt_test.c @@ -0,0 +1,827 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <math.h> + +/* 20 byte TCP + 12 bytes of options (timestamps) = 32 bytes */ +typedef struct { + u64 sequence_number; + f64 time_stamp; + u32 stream_index; + u32 unused[3]; +} __attribute__ ((packed)) rtt_test_header_t; + +typedef struct { + ip4_header_t ip4; + rtt_test_header_t rtt; + u8 payload[0]; +} __attribute__ ((packed)) rtt_test_packet_t; + +typedef struct { + ip4_address_t src_address, dst_address; + + f64 n_packets_to_send; + + f64 send_rate_bits_per_second; + f64 send_rate_packets_per_second; + + f64 packet_accumulator; + + u64 n_packets_sent; + + /* [0] from past, [1] in sequence, [2] from future. */ + u64 n_packets_received[3]; + + f64 tx_time_stream_created; + f64 tx_time_last_sent; + + f64 rx_ack_times[2]; + + u64 rx_expected_sequence_number; + + u32 n_bytes_payload; + + /* Including IP & L2 header. */ + u32 n_bytes_per_packet_on_wire; + + f64 ave_rtt, rms_rtt, rtt_count; + + u32 max_n_rx_ack_dts; + f64 * rx_ack_dts; + + u32 * rtt_histogram; + + vlib_packet_template_t packet_template; +} rtt_test_stream_t; + +typedef struct { + /* Size of encapsulation (e.g. 14 for ethernet). */ + u32 n_encap_bytes; + + u32 is_sender; + + u32 verbose; + + f64 rms_histogram_units; + + rtt_test_stream_t stream_history[32]; + u32 stream_history_index; + + rtt_test_stream_t * stream_pool; + + vlib_packet_template_t ack_packet_template; + u16 ack_packet_template_ip4_checksum; +} rtt_test_main_t; + +/* Use 2 IP protocols 253/254 which are assigned for experimental testing. */ +typedef enum { + RTT_TEST_IP_PROTOCOL_DATA = 253, + RTT_TEST_IP_PROTOCOL_ACK = 254, +} rtt_test_ip_protcol_t; + +always_inline void +rtt_test_stream_free (vlib_main_t * vm, rtt_test_main_t * tm, rtt_test_stream_t * s) +{ + vlib_packet_template_free (vm, &s->packet_template); + memset (&s->packet_template, 0, sizeof (s->packet_template)); + + tm->stream_history[tm->stream_history_index++] = s[0]; + if (tm->stream_history_index >= ARRAY_LEN (tm->stream_history)) + tm->stream_history_index = 0; + + s->rtt_histogram = 0; + pool_put (tm->stream_pool, s); +} + +rtt_test_main_t rtt_test_main; + +#define foreach_rtt_test_error \ + _ (packets_received, "packets received") \ + _ (listener_acks_dropped, "listener acks dropped") \ + _ (unknown_stream, "unknown stream") + +typedef enum { +#define _(sym,str) RTT_TEST_ERROR_##sym, + foreach_rtt_test_error +#undef _ + RTT_TEST_N_ERROR, +} rtt_test_error_t; + +static char * rtt_test_error_strings[] = { +#define _(sym,string) string, + foreach_rtt_test_error +#undef _ +}; + +typedef enum { + RTT_TEST_RX_NEXT_DROP, + RTT_TEST_RX_NEXT_ECHO, + RTT_TEST_RX_N_NEXT, +} rtt_test_rx_next_t; + +static uword +rtt_test_rx_data (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + rtt_test_main_t * tm = &rtt_test_main; + uword n_packets = frame->n_vectors; + u32 * from, * to_drop, * to_echo; + u32 n_left_from, n_left_to_drop, n_left_to_echo; + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, RTT_TEST_RX_NEXT_DROP, to_drop, n_left_to_drop); + vlib_get_next_frame (vm, node, RTT_TEST_RX_NEXT_ECHO, to_echo, n_left_to_echo); + + while (n_left_from > 0 && n_left_to_drop > 0 && n_left_to_echo > 0) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + rtt_test_header_t * r0; + rtt_test_packet_t * ack0; + ip_csum_t sum0; + u32 bi0; + + bi0 = to_drop[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_drop += 1; + n_left_to_drop -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + + r0 = ip4_next_header (ip0); + + p0->error = node->errors[RTT_TEST_ERROR_listener_acks_dropped]; + + ack0 = vlib_packet_template_get_packet (vm, &tm->ack_packet_template, to_echo); + + to_echo += 1; + n_left_to_echo -= 1; + + sum0 = tm->ack_packet_template_ip4_checksum; + + ack0->ip4.src_address = ip0->dst_address; + sum0 = ip_csum_add_even (sum0, ack0->ip4.src_address.as_u32); + + ack0->ip4.dst_address = ip0->src_address; + sum0 = ip_csum_add_even (sum0, ack0->ip4.dst_address.as_u32); + + ack0->ip4.checksum = ip_csum_fold (sum0); + + ASSERT (ack0->ip4.checksum == ip4_header_checksum (&ack0->ip4)); + + ack0->rtt = r0[0]; + } + + vlib_put_next_frame (vm, node, RTT_TEST_RX_NEXT_DROP, n_left_to_drop); + vlib_put_next_frame (vm, node, RTT_TEST_RX_NEXT_ECHO, n_left_to_echo); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (rtt_test_rx_data_node) = { + .function = rtt_test_rx_data, + .name = "rtt-test-rx-data", + + .vector_size = sizeof (u32), + + .n_next_nodes = RTT_TEST_RX_N_NEXT, + .next_nodes = { + [RTT_TEST_RX_NEXT_DROP] = "error-drop", + [RTT_TEST_RX_NEXT_ECHO] = "ip4-input-no-checksum", + }, + + .n_errors = RTT_TEST_N_ERROR, + .error_strings = rtt_test_error_strings, +}; + +static uword +rtt_test_rx_ack (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + rtt_test_main_t * tm = &rtt_test_main; + uword n_packets = frame->n_vectors; + u32 * from, * to_drop; + u32 n_left_from, n_left_to_drop; + f64 now = vlib_time_now (vm); + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, rtt_test_rx_data_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, RTT_TEST_RX_NEXT_DROP, to_drop, n_left_to_drop); + + while (n_left_from > 0 && n_left_to_drop > 0) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + rtt_test_header_t * r0; + rtt_test_stream_t * s0; + u32 bi0, i0; + u64 rseq0, eseq0; + + i0 = 0; + bi0 = to_drop[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_drop += 1; + n_left_to_drop -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + + r0 = ip4_next_header (ip0); + + p0->error = error_node->errors[RTT_TEST_ERROR_listener_acks_dropped]; + + if (pool_is_free_index (tm->stream_pool, r0->stream_index)) + goto bad_stream_x1; + + s0 = pool_elt_at_index (tm->stream_pool, r0->stream_index); + + rseq0 = r0->sequence_number; + eseq0 = s0->rx_expected_sequence_number; + + if (rseq0 != eseq0) + goto out_of_sequence_x1; + + s0->rx_expected_sequence_number = rseq0 + 1; + s0->n_packets_received[1] += 1; + + vec_add1 (s0->rx_ack_dts, now - r0->time_stamp); + _vec_len (s0->rx_ack_dts) -= _vec_len (s0->rx_ack_dts) >= s0->max_n_rx_ack_dts; + + i0 = rseq0 != 0; + s0->rx_ack_times[i0] = now; + continue; + + bad_stream_x1: + { + ELOG_TYPE_DECLARE (e) = { + .format = "rtt-test: unknown stream %d", + .format_args = "i4", + }; + struct { u32 stream; } * ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->stream = r0->stream_index; + } + continue; + + out_of_sequence_x1: + i0 = (r0->sequence_number < s0->rx_expected_sequence_number + ? 0 + : (i0 ? 1 : 2)); + if (i0 != 1) + { + ELOG_TYPE_DECLARE (e) = { + .format = "rtt-test: out-of-seq expected %Ld got %Ld", + .format_args = "i8i8", + }; + struct { u64 expected, got; } * ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->expected = s0->rx_expected_sequence_number; + ed->got = r0->sequence_number; + } + + s0->rx_expected_sequence_number = i0 > 0 ? r0->sequence_number + 1 : s0->rx_expected_sequence_number; + + s0->n_packets_received[i0] += 1; + + i0 = r0->sequence_number > 0; + s0->rx_ack_times[i0] = now; + } + + vlib_put_next_frame (vm, node, RTT_TEST_RX_NEXT_DROP, n_left_to_drop); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (rtt_test_rx_ack_node) = { + .function = rtt_test_rx_ack, + .name = "rtt-test-rx-ack", + + .vector_size = sizeof (u32), + + .n_next_nodes = RTT_TEST_RX_N_NEXT, + .next_nodes = { + [RTT_TEST_RX_NEXT_DROP] = "error-drop", + [RTT_TEST_RX_NEXT_ECHO] = "ip4-input-no-checksum", + }, +}; + +always_inline void +rtt_test_tx_packets (vlib_main_t * vm, + vlib_node_runtime_t * node, + rtt_test_stream_t * s, + f64 time_now, + uword n_packets_to_send) +{ + u32 * to_next, n_this_frame, n_left, n_trace, next, i; + rtt_test_packet_t * p; + vlib_buffer_t * b; + + next = 0; + while (n_packets_to_send > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left); + + n_this_frame = clib_min (n_packets_to_send, n_left); + + for (i = 0; i < n_this_frame; i++) + { + p = vlib_packet_template_get_packet (vm, &s->packet_template, to_next + i); + p->rtt.time_stamp = time_now; + p->rtt.sequence_number = s->n_packets_sent + i; + } + + n_trace = vlib_get_trace_count (vm, node); + if (n_trace > 0) + { + u32 n = clib_min (n_trace, n_this_frame); + + vlib_set_trace_count (vm, node, n_trace - n); + for (i = 0; i < n_this_frame; i++) + { + b = vlib_get_buffer (vm, to_next[i]); + vlib_trace_buffer (vm, node, next, b, /* follow_chain */ 1); + } + } + + s->n_packets_sent += n_this_frame; + n_packets_to_send -= n_this_frame; + n_left -= n_this_frame; + + vlib_put_next_frame (vm, node, next, n_left); + } +} + +always_inline uword +rtt_test_stream_is_done (rtt_test_stream_t * s, f64 time_now) +{ + /* Need to send more packets? */ + if (s->n_packets_to_send > 0 && s->n_packets_sent < s->n_packets_to_send) + return 0; + + /* Received everything we've sent? */ + if (s->n_packets_received[0] + s->n_packets_received[1] + s->n_packets_received[2] >= s->n_packets_to_send) + return 1; + + /* No ACK received after 5 seconds of sending. */ + if (s->rx_ack_times[0] == 0 + && s->n_packets_sent > 0 + && time_now - s->tx_time_stream_created > 5) + return 1; + + /* No ACK received after 5 seconds of waiting? */ + if (time_now - s->rx_ack_times[1] > 5) + return 1; + + return 0; +} + +static_always_inline uword +rtt_test_tx_stream (vlib_main_t * vm, + vlib_node_runtime_t * node, + rtt_test_stream_t * s) +{ + rtt_test_main_t * tm = &rtt_test_main; + uword n_packets; + f64 time_now, dt; + + time_now = vlib_time_now (vm); + + if (rtt_test_stream_is_done (s, time_now)) + { + { + ELOG_TYPE_DECLARE (e) = { + .format = "rtt-test: done stream %d", + .format_args = "i4", + }; + struct { u32 stream_index; } * ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->stream_index = s - tm->stream_pool; + } + + rtt_test_stream_free (vm, tm, s); + if (pool_elts (tm->stream_pool) == 0) + vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_DISABLED); + return 0; + } + + /* Apply rate limit. */ + dt = time_now - s->tx_time_last_sent; + s->tx_time_last_sent = time_now; + + n_packets = VLIB_FRAME_SIZE; + if (s->send_rate_packets_per_second > 0) + { + s->packet_accumulator += dt * s->send_rate_packets_per_second; + n_packets = s->packet_accumulator; + + /* Never allow accumulator to grow if we get behind. */ + s->packet_accumulator -= n_packets; + } + + /* Apply fixed limit. */ + if (s->n_packets_to_send > 0 + && s->n_packets_sent + n_packets > s->n_packets_to_send) + n_packets = s->n_packets_to_send - s->n_packets_sent; + + /* Generate up to one frame's worth of packets. */ + if (n_packets > VLIB_FRAME_SIZE) + n_packets = VLIB_FRAME_SIZE; + + if (n_packets > 0) + rtt_test_tx_packets (vm, node, s, time_now, n_packets); + + return n_packets; +} + +static uword +rtt_test_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + rtt_test_main_t * tm = &rtt_test_main; + rtt_test_stream_t * s; + uword n_packets = 0; + + pool_foreach (s, tm->stream_pool, ({ + n_packets += rtt_test_tx_stream (vm, node, s); + })); + + return n_packets; +} + +VLIB_REGISTER_NODE (rtt_test_tx_node) = { + .function = rtt_test_tx, + .name = "rtt-test-tx", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, + + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "ip4-input-no-checksum", + }, +}; + +static void rtt_test_stream_compute (rtt_test_main_t * tm, rtt_test_stream_t * s) +{ + int i; + + /* Compute average and standard deviation of RTT time. */ + if (vec_len (s->rx_ack_dts) == 0) + return; + + { + f64 c = vec_len (s->rx_ack_dts); + + s->ave_rtt = s->rms_rtt = 0; + vec_foreach_index (i, s->rx_ack_dts) + { + f64 dt = s->rx_ack_dts[i]; + s->ave_rtt += dt; + s->rms_rtt += dt*dt; + } + s->ave_rtt /= c; + s->rms_rtt = sqrt (s->rms_rtt / c - s->ave_rtt*s->ave_rtt); + s->rtt_count = c; + } + + if (! tm->rms_histogram_units) + tm->rms_histogram_units = .1; + + /* Generate historgram. */ + vec_foreach_index (i, s->rx_ack_dts) + { + i32 bin = flt_round_nearest ((s->rx_ack_dts[i] - s->ave_rtt) / (tm->rms_histogram_units * s->rms_rtt)); + u32 ib = bin < 0 ? 2*(-bin) + 1 : 2 *bin; + vec_validate (s->rtt_histogram, ib); + s->rtt_histogram[ib] += 1; + } + + if (s->n_packets_sent >= s->n_packets_to_send) + vec_free (s->rx_ack_dts); +} + +static clib_error_t * +do_plot_stream (rtt_test_main_t * tm, rtt_test_stream_t * s, char * file_name, int n) +{ + FILE * out; + char * f; + clib_error_t * error = 0; + u32 i; + + f = (char *) format (0, "%s.%d%c", file_name, n, 0); + out = fopen (f, "w"); + + if (! out) + { + error = clib_error_return_unix (0, "open `%s'", f); + goto done; + } + + rtt_test_stream_compute (tm, s); + vec_foreach_index (i, s->rtt_histogram) + { + if (s->rtt_histogram[i] > 0) + { + i32 bi = (i & 1) ? -(i/2) : (i/2); + f64 dt = s->ave_rtt + (bi * tm->rms_histogram_units * s->rms_rtt); + fformat (out, "%.6e %.6e\n", + dt, s->rtt_histogram[i] / s->rtt_count); + } + } + clib_warning ("wrote `%s'", f); + + done: + vec_free (f); + fclose (out); + return error; +} + +static clib_error_t * +do_plot (rtt_test_main_t * tm, char * file_name) +{ + rtt_test_stream_t * s; + clib_error_t * error = 0; + int i, n; + + n = 0; + for (i = 0; i < ARRAY_LEN (tm->stream_history); i++) + { + s = tm->stream_history + i; + if (s->n_packets_sent > 0) + { + error = do_plot_stream (tm, s, file_name, n++); + if (error) + return error; + } + } + + pool_foreach (s, tm->stream_pool, ({ + error = do_plot_stream (tm, s, file_name, n++); + if (error) + return error; + })); + + return error; +} + +static clib_error_t * +rtt_test_command (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + rtt_test_main_t * tm = &rtt_test_main; + rtt_test_stream_t * s; + + { + char * file_name; + + if (unformat (input, "plot %s", &file_name)) + { + clib_error_t * e = do_plot (tm, file_name); + vec_free (file_name); + return e; + } + } + + pool_get (tm->stream_pool, s); + + memset (s, 0, sizeof (s[0])); + s->n_packets_to_send = 1; + s->send_rate_bits_per_second = 1e6; + s->n_bytes_payload = 1448; + s->max_n_rx_ack_dts = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U -> %U", + unformat_ip4_address, &s->src_address, + unformat_ip4_address, &s->dst_address)) + ; + else if (unformat (input, "count %f", &s->n_packets_to_send)) + ; + else if (unformat (input, "hist %d", &s->max_n_rx_ack_dts)) + ; + else if (unformat (input, "rate %f", &s->send_rate_bits_per_second)) + ; + else if (unformat (input, "size %d", &s->n_bytes_payload)) + ; + else + return clib_error_return (0, "parse error: %U", format_unformat_error, input); + } + + if (pool_elts (tm->stream_pool) == 1) + vlib_node_set_state (vm, rtt_test_tx_node.index, VLIB_NODE_STATE_POLLING); + + if (! s->max_n_rx_ack_dts) + s->max_n_rx_ack_dts = s->n_packets_to_send; + vec_validate (s->rx_ack_dts, s->max_n_rx_ack_dts - 1); + _vec_len (s->rx_ack_dts) = 0; + + s->tx_time_stream_created = vlib_time_now (vm); + s->tx_time_last_sent = s->tx_time_stream_created; + s->n_bytes_per_packet_on_wire + = (s->n_bytes_payload + + sizeof (rtt_test_header_t) + + sizeof (ip4_header_t) + + tm->n_encap_bytes); + + s->send_rate_packets_per_second = s->send_rate_bits_per_second / (s->n_bytes_per_packet_on_wire * BITS (u8)); + + { + rtt_test_packet_t * t; + int i; + + t = clib_mem_alloc_no_fail (sizeof (t[0]) + s->n_bytes_payload); + memset (t, 0, sizeof (t[0])); + + t->ip4.ip_version_and_header_length = 0x45; + t->ip4.length = clib_host_to_net_u16 (sizeof (t[0]) + s->n_bytes_payload); + t->ip4.flags_and_fragment_offset = clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); + t->ip4.protocol = RTT_TEST_IP_PROTOCOL_DATA; + t->ip4.ttl = 64; + + t->ip4.src_address = s->src_address; + t->ip4.dst_address = s->dst_address; + + t->ip4.checksum = ip4_header_checksum (&t->ip4); + + t->rtt.stream_index = s - tm->stream_pool; + + for (i = 0; i < s->n_bytes_payload; i++) + t->payload[i] = i; + + vlib_packet_template_init (vm, &s->packet_template, + t, sizeof (t[0]) + s->n_bytes_payload, + /* alloc chunk size */ VLIB_FRAME_SIZE, + "rtt-test stream %d data", s - tm->stream_pool); + + clib_mem_free (t); + } + + { + ELOG_TYPE_DECLARE (e) = { + .format = "rtt-test: start stream %d", + .format_args = "i4", + }; + struct { u32 stream_index; } * ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->stream_index = s - tm->stream_pool; + } + + return 0; +} + +VLIB_CLI_COMMAND (rtt_test_cli_command, static) = { + .path = "test rtt", + .short_help = "Measure RTT test protocol", + .function = rtt_test_command, +}; + +static u8 * format_rtt_test_stream (u8 * s, va_list * args) +{ + rtt_test_stream_t * t = va_arg (*args, rtt_test_stream_t *); + uword indent = format_get_indent (s); + + s = format (s, "%U -> %U", + format_ip4_address, &t->src_address, + format_ip4_address, &t->dst_address); + + s = format (s, "\n%U sent %Ld, received: from-past %Ld in-sequence %Ld from-future %Ld", + format_white_space, indent, + t->n_packets_sent, + t->n_packets_received[0], t->n_packets_received[1], t->n_packets_received[2]); + + s = format (s, "\n%U rx-rate %.4e bits/sec", + format_white_space, indent, + (((f64) (t->n_packets_received[0] + t->n_packets_received[1] + t->n_packets_received[2]) * (f64) t->n_bytes_per_packet_on_wire * BITS (u8)) + / (t->rx_ack_times[1] - t->rx_ack_times[0]))); + + rtt_test_stream_compute (&rtt_test_main, t); + + s = format (s, "\n%U rtt %.4e +- %.4e", + format_white_space, indent, + t->ave_rtt, t->rms_rtt); + + return s; +} + +static clib_error_t * +rtt_show_command (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + rtt_test_main_t * tm = &rtt_test_main; + rtt_test_stream_t * s; + int i; + + for (i = 0; i < ARRAY_LEN (tm->stream_history); i++) + { + s = tm->stream_history + i; + if (s->n_packets_sent > 0) + vlib_cli_output (vm, "%U", format_rtt_test_stream, s); + } + + pool_foreach (s, tm->stream_pool, ({ + vlib_cli_output (vm, "%U", format_rtt_test_stream, s); + })); + + return 0; +} + +VLIB_CLI_COMMAND (rtt_show_cli_command, static) = { + .path = "show rtt", + .short_help = "Show RTT measurements", + .function = rtt_show_command, +}; + +static clib_error_t * +rtt_test_init (vlib_main_t * vm) +{ + rtt_test_main_t * tm = &rtt_test_main; + + ip4_register_protocol (RTT_TEST_IP_PROTOCOL_DATA, rtt_test_rx_data_node.index); + ip4_register_protocol (RTT_TEST_IP_PROTOCOL_ACK, rtt_test_rx_ack_node.index); + + { + rtt_test_packet_t ack; + + memset (&ack, 0, sizeof (ack)); + + ack.ip4.ip_version_and_header_length = 0x45; + ack.ip4.length = clib_host_to_net_u16 (sizeof (ack)); + ack.ip4.flags_and_fragment_offset = clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); + ack.ip4.protocol = RTT_TEST_IP_PROTOCOL_ACK; + ack.ip4.ttl = 64; + + ack.ip4.checksum = ip4_header_checksum (&ack.ip4); + tm->ack_packet_template_ip4_checksum = ack.ip4.checksum; + + vlib_packet_template_init (vm, &tm->ack_packet_template, + &ack, + sizeof (ack), + /* alloc chunk size */ VLIB_FRAME_SIZE, + "rtt-test ack"); + } + + return /* no error */ 0; +} + +static VLIB_INIT_FUNCTION (rtt_test_init); + +static clib_error_t * +rtt_test_config (vlib_main_t * vm, unformat_input_t * input) +{ + rtt_test_main_t * tm = &rtt_test_main; + clib_error_t * error = 0; + + tm->rms_histogram_units = .1; + tm->n_encap_bytes = + (14 /* ethernet header */ + + 8 /* preamble */ + + 12 /* inter packet gap */ + + 4 /* crc */); + tm->verbose = 1; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "rms-histogram-units %f", &tm->rms_histogram_units)) + ; + else if (unformat (input, "silent")) + tm->verbose = 0; + else + clib_error ("%U", format_unformat_error, input); + } + + return error; +} + +VLIB_CONFIG_FUNCTION (rtt_test_config, "rtt-test"); diff --git a/vnet/install-sh b/vnet/install-sh new file mode 100755 index 00000000000..377bb8687ff --- /dev/null +++ b/vnet/install-sh @@ -0,0 +1,527 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2011-11-20.07; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +nl=' +' +IFS=" "" $nl" + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit=${DOITPROG-} +if test -z "$doit"; then + doit_exec=exec +else + doit_exec=$doit +fi + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_glob='?' +initialize_posix_glob=' + test "$posix_glob" != "?" || { + if (set -f) 2>/dev/null; then + posix_glob= + else + posix_glob=: + fi + } +' + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +no_target_directory= + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *' '* | *' +'* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) no_target_directory=true;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test -n "$no_target_directory"; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dst=$dstdir/`basename "$src"` + dstdir_status=0 + else + # Prefer dirname, but fall back on a substitute if dirname fails. + dstdir=` + (dirname "$dst") 2>/dev/null || + expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$dst" : 'X\(//\)[^/]' \| \ + X"$dst" : 'X\(//\)$' \| \ + X"$dst" : 'X\(/\)' \| . 2>/dev/null || + echo X"$dst" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q' + ` + + test -d "$dstdir" + dstdir_status=$? + fi + fi + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 + + if (umask $mkdir_umask && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + ls_ld_tmpdir=`ls -ld "$tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/d" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + eval "$initialize_posix_glob" + + oIFS=$IFS + IFS=/ + $posix_glob set -f + set fnord $dstdir + shift + $posix_glob set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + + eval "$initialize_posix_glob" && + $posix_glob set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + $posix_glob set +f && + + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/vnet/ltmain.sh b/vnet/ltmain.sh new file mode 100644 index 00000000000..a356acafa45 --- /dev/null +++ b/vnet/ltmain.sh @@ -0,0 +1,9661 @@ + +# libtool (GNU libtool) 2.4.2 +# Written by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996 + +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, +# 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Libtool; see the file COPYING. If not, a copy +# can be downloaded from http://www.gnu.org/licenses/gpl.html, +# or obtained by writing to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# Usage: $progname [OPTION]... [MODE-ARG]... +# +# Provide generalized library-building support services. +# +# --config show all configuration variables +# --debug enable verbose shell tracing +# -n, --dry-run display commands without modifying any files +# --features display basic configuration information and exit +# --mode=MODE use operation mode MODE +# --preserve-dup-deps don't remove duplicate dependency libraries +# --quiet, --silent don't print informational messages +# --no-quiet, --no-silent +# print informational messages (default) +# --no-warn don't display warning messages +# --tag=TAG use configuration variables from tag TAG +# -v, --verbose print more informational messages than default +# --no-verbose don't print the extra informational messages +# --version print version information +# -h, --help, --help-all print short, long, or detailed help message +# +# MODE must be one of the following: +# +# clean remove files from the build directory +# compile compile a source file into a libtool object +# execute automatically set library path, then run a program +# finish complete the installation of libtool libraries +# install install libraries or executables +# link create a library or an executable +# uninstall remove libraries from an installed directory +# +# MODE-ARGS vary depending on the MODE. When passed as first option, +# `--mode=MODE' may be abbreviated as `MODE' or a unique abbreviation of that. +# Try `$progname --help --mode=MODE' for a more detailed description of MODE. +# +# When reporting a bug, please describe a test case to reproduce it and +# include the following information: +# +# host-triplet: $host +# shell: $SHELL +# compiler: $LTCC +# compiler flags: $LTCFLAGS +# linker: $LD (gnu? $with_gnu_ld) +# $progname: (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1 +# automake: $automake_version +# autoconf: $autoconf_version +# +# Report bugs to <bug-libtool@gnu.org>. +# GNU libtool home page: <http://www.gnu.org/software/libtool/>. +# General help using GNU software: <http://www.gnu.org/gethelp/>. + +PROGRAM=libtool +PACKAGE=libtool +VERSION="2.4.2 Debian-2.4.2-1.7ubuntu1" +TIMESTAMP="" +package_revision=1.3337 + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' +} + +# NLS nuisances: We save the old values to restore during execute mode. +lt_user_locale= +lt_safe_locale= +for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES +do + eval "if test \"\${$lt_var+set}\" = set; then + save_$lt_var=\$$lt_var + $lt_var=C + export $lt_var + lt_user_locale=\"$lt_var=\\\$save_\$lt_var; \$lt_user_locale\" + lt_safe_locale=\"$lt_var=C; \$lt_safe_locale\" + fi" +done +LC_ALL=C +LANGUAGE=C +export LANGUAGE LC_ALL + +$lt_unset CDPATH + + +# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh +# is ksh but when the shell is invoked as "sh" and the current value of +# the _XPG environment variable is not equal to 1 (one), the special +# positional parameter $0, within a function call, is the name of the +# function. +progpath="$0" + + + +: ${CP="cp -f"} +test "${ECHO+set}" = set || ECHO=${as_echo-'printf %s\n'} +: ${MAKE="make"} +: ${MKDIR="mkdir"} +: ${MV="mv -f"} +: ${RM="rm -f"} +: ${SHELL="${CONFIG_SHELL-/bin/sh}"} +: ${Xsed="$SED -e 1s/^X//"} + +# Global variables: +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing. +EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. + +exit_status=$EXIT_SUCCESS + +# Make sure IFS has a sensible default +lt_nl=' +' +IFS=" $lt_nl" + +dirname="s,/[^/]*$,," +basename="s,^.*/,," + +# func_dirname file append nondir_replacement +# Compute the dirname of FILE. If nonempty, add APPEND to the result, +# otherwise set result to NONDIR_REPLACEMENT. +func_dirname () +{ + func_dirname_result=`$ECHO "${1}" | $SED "$dirname"` + if test "X$func_dirname_result" = "X${1}"; then + func_dirname_result="${3}" + else + func_dirname_result="$func_dirname_result${2}" + fi +} # func_dirname may be replaced by extended shell implementation + + +# func_basename file +func_basename () +{ + func_basename_result=`$ECHO "${1}" | $SED "$basename"` +} # func_basename may be replaced by extended shell implementation + + +# func_dirname_and_basename file append nondir_replacement +# perform func_basename and func_dirname in a single function +# call: +# dirname: Compute the dirname of FILE. If nonempty, +# add APPEND to the result, otherwise set result +# to NONDIR_REPLACEMENT. +# value returned in "$func_dirname_result" +# basename: Compute filename of FILE. +# value retuned in "$func_basename_result" +# Implementation must be kept synchronized with func_dirname +# and func_basename. For efficiency, we do not delegate to +# those functions but instead duplicate the functionality here. +func_dirname_and_basename () +{ + # Extract subdirectory from the argument. + func_dirname_result=`$ECHO "${1}" | $SED -e "$dirname"` + if test "X$func_dirname_result" = "X${1}"; then + func_dirname_result="${3}" + else + func_dirname_result="$func_dirname_result${2}" + fi + func_basename_result=`$ECHO "${1}" | $SED -e "$basename"` +} # func_dirname_and_basename may be replaced by extended shell implementation + + +# func_stripname prefix suffix name +# strip PREFIX and SUFFIX off of NAME. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +# func_strip_suffix prefix name +func_stripname () +{ + case ${2} in + .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; + *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; + esac +} # func_stripname may be replaced by extended shell implementation + + +# These SED scripts presuppose an absolute path with a trailing slash. +pathcar='s,^/\([^/]*\).*$,\1,' +pathcdr='s,^/[^/]*,,' +removedotparts=':dotsl + s@/\./@/@g + t dotsl + s,/\.$,/,' +collapseslashes='s@/\{1,\}@/@g' +finalslash='s,/*$,/,' + +# func_normal_abspath PATH +# Remove doubled-up and trailing slashes, "." path components, +# and cancel out any ".." path components in PATH after making +# it an absolute path. +# value returned in "$func_normal_abspath_result" +func_normal_abspath () +{ + # Start from root dir and reassemble the path. + func_normal_abspath_result= + func_normal_abspath_tpath=$1 + func_normal_abspath_altnamespace= + case $func_normal_abspath_tpath in + "") + # Empty path, that just means $cwd. + func_stripname '' '/' "`pwd`" + func_normal_abspath_result=$func_stripname_result + return + ;; + # The next three entries are used to spot a run of precisely + # two leading slashes without using negated character classes; + # we take advantage of case's first-match behaviour. + ///*) + # Unusual form of absolute path, do nothing. + ;; + //*) + # Not necessarily an ordinary path; POSIX reserves leading '//' + # and for example Cygwin uses it to access remote file shares + # over CIFS/SMB, so we conserve a leading double slash if found. + func_normal_abspath_altnamespace=/ + ;; + /*) + # Absolute path, do nothing. + ;; + *) + # Relative path, prepend $cwd. + func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath + ;; + esac + # Cancel out all the simple stuff to save iterations. We also want + # the path to end with a slash for ease of parsing, so make sure + # there is one (and only one) here. + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$removedotparts" -e "$collapseslashes" -e "$finalslash"` + while :; do + # Processed it all yet? + if test "$func_normal_abspath_tpath" = / ; then + # If we ascended to the root using ".." the result may be empty now. + if test -z "$func_normal_abspath_result" ; then + func_normal_abspath_result=/ + fi + break + fi + func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$pathcar"` + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$pathcdr"` + # Figure out what to do with it + case $func_normal_abspath_tcomponent in + "") + # Trailing empty path component, ignore it. + ;; + ..) + # Parent dir; strip last assembled component from result. + func_dirname "$func_normal_abspath_result" + func_normal_abspath_result=$func_dirname_result + ;; + *) + # Actual path component, append it. + func_normal_abspath_result=$func_normal_abspath_result/$func_normal_abspath_tcomponent + ;; + esac + done + # Restore leading double-slash if one was found on entry. + func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result +} + +# func_relative_path SRCDIR DSTDIR +# generates a relative path from SRCDIR to DSTDIR, with a trailing +# slash if non-empty, suitable for immediately appending a filename +# without needing to append a separator. +# value returned in "$func_relative_path_result" +func_relative_path () +{ + func_relative_path_result= + func_normal_abspath "$1" + func_relative_path_tlibdir=$func_normal_abspath_result + func_normal_abspath "$2" + func_relative_path_tbindir=$func_normal_abspath_result + + # Ascend the tree starting from libdir + while :; do + # check if we have found a prefix of bindir + case $func_relative_path_tbindir in + $func_relative_path_tlibdir) + # found an exact match + func_relative_path_tcancelled= + break + ;; + $func_relative_path_tlibdir*) + # found a matching prefix + func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir" + func_relative_path_tcancelled=$func_stripname_result + if test -z "$func_relative_path_result"; then + func_relative_path_result=. + fi + break + ;; + *) + func_dirname $func_relative_path_tlibdir + func_relative_path_tlibdir=${func_dirname_result} + if test "x$func_relative_path_tlibdir" = x ; then + # Have to descend all the way to the root! + func_relative_path_result=../$func_relative_path_result + func_relative_path_tcancelled=$func_relative_path_tbindir + break + fi + func_relative_path_result=../$func_relative_path_result + ;; + esac + done + + # Now calculate path; take care to avoid doubling-up slashes. + func_stripname '' '/' "$func_relative_path_result" + func_relative_path_result=$func_stripname_result + func_stripname '/' '/' "$func_relative_path_tcancelled" + if test "x$func_stripname_result" != x ; then + func_relative_path_result=${func_relative_path_result}/${func_stripname_result} + fi + + # Normalisation. If bindir is libdir, return empty string, + # else relative path ending with a slash; either way, target + # file name can be directly appended. + if test ! -z "$func_relative_path_result"; then + func_stripname './' '' "$func_relative_path_result/" + func_relative_path_result=$func_stripname_result + fi +} + +# The name of this program: +func_dirname_and_basename "$progpath" +progname=$func_basename_result + +# Make sure we have an absolute path for reexecution: +case $progpath in + [\\/]*|[A-Za-z]:\\*) ;; + *[\\/]*) + progdir=$func_dirname_result + progdir=`cd "$progdir" && pwd` + progpath="$progdir/$progname" + ;; + *) + save_IFS="$IFS" + IFS=${PATH_SEPARATOR-:} + for progdir in $PATH; do + IFS="$save_IFS" + test -x "$progdir/$progname" && break + done + IFS="$save_IFS" + test -n "$progdir" || progdir=`pwd` + progpath="$progdir/$progname" + ;; +esac + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +Xsed="${SED}"' -e 1s/^X//' +sed_quote_subst='s/\([`"$\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution that turns a string into a regex matching for the +# string literally. +sed_make_literal_regex='s,[].[^$\\*\/],\\&,g' + +# Sed substitution that converts a w32 file name or path +# which contains forward slashes, into one that contains +# (escaped) backslashes. A very naive implementation. +lt_sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g' + +# Re-`\' parameter expansions in output of double_quote_subst that were +# `\'-ed in input to the same. If an odd number of `\' preceded a '$' +# in input to double_quote_subst, that '$' was protected from expansion. +# Since each input `\' is now two `\'s, look for any number of runs of +# four `\'s followed by two `\'s and then a '$'. `\' that '$'. +bs='\\' +bs2='\\\\' +bs4='\\\\\\\\' +dollar='\$' +sed_double_backslash="\ + s/$bs4/&\\ +/g + s/^$bs2$dollar/$bs&/ + s/\\([^$bs]\\)$bs2$dollar/\\1$bs2$bs$dollar/g + s/\n//g" + +# Standard options: +opt_dry_run=false +opt_help=false +opt_quiet=false +opt_verbose=false +opt_warning=: + +# func_echo arg... +# Echo program name prefixed message, along with the current mode +# name if it has been set yet. +func_echo () +{ + $ECHO "$progname: ${opt_mode+$opt_mode: }$*" +} + +# func_verbose arg... +# Echo program name prefixed message in verbose mode only. +func_verbose () +{ + $opt_verbose && func_echo ${1+"$@"} + + # A bug in bash halts the script if the last line of a function + # fails when set -e is in force, so we need another command to + # work around that: + : +} + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + +# func_error arg... +# Echo program name prefixed message to standard error. +func_error () +{ + $ECHO "$progname: ${opt_mode+$opt_mode: }"${1+"$@"} 1>&2 +} + +# func_warning arg... +# Echo program name prefixed warning message to standard error. +func_warning () +{ + $opt_warning && $ECHO "$progname: ${opt_mode+$opt_mode: }warning: "${1+"$@"} 1>&2 + + # bash bug again: + : +} + +# func_fatal_error arg... +# Echo program name prefixed message to standard error, and exit. +func_fatal_error () +{ + func_error ${1+"$@"} + exit $EXIT_FAILURE +} + +# func_fatal_help arg... +# Echo program name prefixed message to standard error, followed by +# a help hint, and exit. +func_fatal_help () +{ + func_error ${1+"$@"} + func_fatal_error "$help" +} +help="Try \`$progname --help' for more information." ## default + + +# func_grep expression filename +# Check whether EXPRESSION matches any line of FILENAME, without output. +func_grep () +{ + $GREP "$1" "$2" >/dev/null 2>&1 +} + + +# func_mkdir_p directory-path +# Make sure the entire path to DIRECTORY-PATH is available. +func_mkdir_p () +{ + my_directory_path="$1" + my_dir_list= + + if test -n "$my_directory_path" && test "$opt_dry_run" != ":"; then + + # Protect directory names starting with `-' + case $my_directory_path in + -*) my_directory_path="./$my_directory_path" ;; + esac + + # While some portion of DIR does not yet exist... + while test ! -d "$my_directory_path"; do + # ...make a list in topmost first order. Use a colon delimited + # list incase some portion of path contains whitespace. + my_dir_list="$my_directory_path:$my_dir_list" + + # If the last portion added has no slash in it, the list is done + case $my_directory_path in */*) ;; *) break ;; esac + + # ...otherwise throw away the child directory and loop + my_directory_path=`$ECHO "$my_directory_path" | $SED -e "$dirname"` + done + my_dir_list=`$ECHO "$my_dir_list" | $SED 's,:*$,,'` + + save_mkdir_p_IFS="$IFS"; IFS=':' + for my_dir in $my_dir_list; do + IFS="$save_mkdir_p_IFS" + # mkdir can fail with a `File exist' error if two processes + # try to create one of the directories concurrently. Don't + # stop in that case! + $MKDIR "$my_dir" 2>/dev/null || : + done + IFS="$save_mkdir_p_IFS" + + # Bail out if we (or some other process) failed to create a directory. + test -d "$my_directory_path" || \ + func_fatal_error "Failed to create \`$1'" + fi +} + + +# func_mktempdir [string] +# Make a temporary directory that won't clash with other running +# libtool processes, and avoids race conditions if possible. If +# given, STRING is the basename for that directory. +func_mktempdir () +{ + my_template="${TMPDIR-/tmp}/${1-$progname}" + + if test "$opt_dry_run" = ":"; then + # Return a directory name, but don't create it in dry-run mode + my_tmpdir="${my_template}-$$" + else + + # If mktemp works, use that first and foremost + my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null` + + if test ! -d "$my_tmpdir"; then + # Failing that, at least try and use $RANDOM to avoid a race + my_tmpdir="${my_template}-${RANDOM-0}$$" + + save_mktempdir_umask=`umask` + umask 0077 + $MKDIR "$my_tmpdir" + umask $save_mktempdir_umask + fi + + # If we're not in dry-run mode, bomb out on failure + test -d "$my_tmpdir" || \ + func_fatal_error "cannot create temporary directory \`$my_tmpdir'" + fi + + $ECHO "$my_tmpdir" +} + + +# func_quote_for_eval arg +# Aesthetically quote ARG to be evaled later. +# This function returns two values: FUNC_QUOTE_FOR_EVAL_RESULT +# is double-quoted, suitable for a subsequent eval, whereas +# FUNC_QUOTE_FOR_EVAL_UNQUOTED_RESULT has merely all characters +# which are still active within double quotes backslashified. +func_quote_for_eval () +{ + case $1 in + *[\\\`\"\$]*) + func_quote_for_eval_unquoted_result=`$ECHO "$1" | $SED "$sed_quote_subst"` ;; + *) + func_quote_for_eval_unquoted_result="$1" ;; + esac + + case $func_quote_for_eval_unquoted_result in + # Double-quote args containing shell metacharacters to delay + # word splitting, command substitution and and variable + # expansion for a subsequent eval. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + func_quote_for_eval_result="\"$func_quote_for_eval_unquoted_result\"" + ;; + *) + func_quote_for_eval_result="$func_quote_for_eval_unquoted_result" + esac +} + + +# func_quote_for_expand arg +# Aesthetically quote ARG to be evaled later; same as above, +# but do not quote variable references. +func_quote_for_expand () +{ + case $1 in + *[\\\`\"]*) + my_arg=`$ECHO "$1" | $SED \ + -e "$double_quote_subst" -e "$sed_double_backslash"` ;; + *) + my_arg="$1" ;; + esac + + case $my_arg in + # Double-quote args containing shell metacharacters to delay + # word splitting and command substitution for a subsequent eval. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + my_arg="\"$my_arg\"" + ;; + esac + + func_quote_for_expand_result="$my_arg" +} + + +# func_show_eval cmd [fail_exp] +# Unless opt_silent is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. +func_show_eval () +{ + my_cmd="$1" + my_fail_exp="${2-:}" + + ${opt_silent-false} || { + func_quote_for_expand "$my_cmd" + eval "func_echo $func_quote_for_expand_result" + } + + if ${opt_dry_run-false}; then :; else + eval "$my_cmd" + my_status=$? + if test "$my_status" -eq 0; then :; else + eval "(exit $my_status); $my_fail_exp" + fi + fi +} + + +# func_show_eval_locale cmd [fail_exp] +# Unless opt_silent is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. Use the saved locale for evaluation. +func_show_eval_locale () +{ + my_cmd="$1" + my_fail_exp="${2-:}" + + ${opt_silent-false} || { + func_quote_for_expand "$my_cmd" + eval "func_echo $func_quote_for_expand_result" + } + + if ${opt_dry_run-false}; then :; else + eval "$lt_user_locale + $my_cmd" + my_status=$? + eval "$lt_safe_locale" + if test "$my_status" -eq 0; then :; else + eval "(exit $my_status); $my_fail_exp" + fi + fi +} + +# func_tr_sh +# Turn $1 into a string suitable for a shell variable name. +# Result is stored in $func_tr_sh_result. All characters +# not in the set a-zA-Z0-9_ are replaced with '_'. Further, +# if $1 begins with a digit, a '_' is prepended as well. +func_tr_sh () +{ + case $1 in + [0-9]* | *[!a-zA-Z0-9_]*) + func_tr_sh_result=`$ECHO "$1" | $SED 's/^\([0-9]\)/_\1/; s/[^a-zA-Z0-9_]/_/g'` + ;; + * ) + func_tr_sh_result=$1 + ;; + esac +} + + +# func_version +# Echo version message to standard output and exit. +func_version () +{ + $opt_debug + + $SED -n '/(C)/!b go + :more + /\./!{ + N + s/\n# / / + b more + } + :go + /^# '$PROGRAM' (GNU /,/# warranty; / { + s/^# // + s/^# *$// + s/\((C)\)[ 0-9,-]*\( [1-9][0-9]*\)/\1\2/ + p + }' < "$progpath" + exit $? +} + +# func_usage +# Echo short help message to standard output and exit. +func_usage () +{ + $opt_debug + + $SED -n '/^# Usage:/,/^# *.*--help/ { + s/^# // + s/^# *$// + s/\$progname/'$progname'/ + p + }' < "$progpath" + echo + $ECHO "run \`$progname --help | more' for full usage" + exit $? +} + +# func_help [NOEXIT] +# Echo long help message to standard output and exit, +# unless 'noexit' is passed as argument. +func_help () +{ + $opt_debug + + $SED -n '/^# Usage:/,/# Report bugs to/ { + :print + s/^# // + s/^# *$// + s*\$progname*'$progname'* + s*\$host*'"$host"'* + s*\$SHELL*'"$SHELL"'* + s*\$LTCC*'"$LTCC"'* + s*\$LTCFLAGS*'"$LTCFLAGS"'* + s*\$LD*'"$LD"'* + s/\$with_gnu_ld/'"$with_gnu_ld"'/ + s/\$automake_version/'"`(${AUTOMAKE-automake} --version) 2>/dev/null |$SED 1q`"'/ + s/\$autoconf_version/'"`(${AUTOCONF-autoconf} --version) 2>/dev/null |$SED 1q`"'/ + p + d + } + /^# .* home page:/b print + /^# General help using/b print + ' < "$progpath" + ret=$? + if test -z "$1"; then + exit $ret + fi +} + +# func_missing_arg argname +# Echo program name prefixed message to standard error and set global +# exit_cmd. +func_missing_arg () +{ + $opt_debug + + func_error "missing argument for $1." + exit_cmd=exit +} + + +# func_split_short_opt shortopt +# Set func_split_short_opt_name and func_split_short_opt_arg shell +# variables after splitting SHORTOPT after the 2nd character. +func_split_short_opt () +{ + my_sed_short_opt='1s/^\(..\).*$/\1/;q' + my_sed_short_rest='1s/^..\(.*\)$/\1/;q' + + func_split_short_opt_name=`$ECHO "$1" | $SED "$my_sed_short_opt"` + func_split_short_opt_arg=`$ECHO "$1" | $SED "$my_sed_short_rest"` +} # func_split_short_opt may be replaced by extended shell implementation + + +# func_split_long_opt longopt +# Set func_split_long_opt_name and func_split_long_opt_arg shell +# variables after splitting LONGOPT at the `=' sign. +func_split_long_opt () +{ + my_sed_long_opt='1s/^\(--[^=]*\)=.*/\1/;q' + my_sed_long_arg='1s/^--[^=]*=//' + + func_split_long_opt_name=`$ECHO "$1" | $SED "$my_sed_long_opt"` + func_split_long_opt_arg=`$ECHO "$1" | $SED "$my_sed_long_arg"` +} # func_split_long_opt may be replaced by extended shell implementation + +exit_cmd=: + + + + + +magic="%%%MAGIC variable%%%" +magic_exe="%%%MAGIC EXE variable%%%" + +# Global variables. +nonopt= +preserve_args= +lo2o="s/\\.lo\$/.${objext}/" +o2lo="s/\\.${objext}\$/.lo/" +extracted_archives= +extracted_serial=0 + +# If this variable is set in any of the actions, the command in it +# will be execed at the end. This prevents here-documents from being +# left over by shells. +exec_cmd= + +# func_append var value +# Append VALUE to the end of shell variable VAR. +func_append () +{ + eval "${1}=\$${1}\${2}" +} # func_append may be replaced by extended shell implementation + +# func_append_quoted var value +# Quote VALUE and append to the end of shell variable VAR, separated +# by a space. +func_append_quoted () +{ + func_quote_for_eval "${2}" + eval "${1}=\$${1}\\ \$func_quote_for_eval_result" +} # func_append_quoted may be replaced by extended shell implementation + + +# func_arith arithmetic-term... +func_arith () +{ + func_arith_result=`expr "${@}"` +} # func_arith may be replaced by extended shell implementation + + +# func_len string +# STRING may not start with a hyphen. +func_len () +{ + func_len_result=`expr "${1}" : ".*" 2>/dev/null || echo $max_cmd_len` +} # func_len may be replaced by extended shell implementation + + +# func_lo2o object +func_lo2o () +{ + func_lo2o_result=`$ECHO "${1}" | $SED "$lo2o"` +} # func_lo2o may be replaced by extended shell implementation + + +# func_xform libobj-or-source +func_xform () +{ + func_xform_result=`$ECHO "${1}" | $SED 's/\.[^.]*$/.lo/'` +} # func_xform may be replaced by extended shell implementation + + +# func_fatal_configuration arg... +# Echo program name prefixed message to standard error, followed by +# a configuration failure hint, and exit. +func_fatal_configuration () +{ + func_error ${1+"$@"} + func_error "See the $PACKAGE documentation for more information." + func_fatal_error "Fatal configuration error." +} + + +# func_config +# Display the configuration for all the tags in this script. +func_config () +{ + re_begincf='^# ### BEGIN LIBTOOL' + re_endcf='^# ### END LIBTOOL' + + # Default configuration. + $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath" + + # Now print the configurations for the tags. + for tagname in $taglist; do + $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath" + done + + exit $? +} + +# func_features +# Display the features supported by this script. +func_features () +{ + echo "host: $host" + if test "$build_libtool_libs" = yes; then + echo "enable shared libraries" + else + echo "disable shared libraries" + fi + if test "$build_old_libs" = yes; then + echo "enable static libraries" + else + echo "disable static libraries" + fi + + exit $? +} + +# func_enable_tag tagname +# Verify that TAGNAME is valid, and either flag an error and exit, or +# enable the TAGNAME tag. We also add TAGNAME to the global $taglist +# variable here. +func_enable_tag () +{ + # Global variable: + tagname="$1" + + re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$" + re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$" + sed_extractcf="/$re_begincf/,/$re_endcf/p" + + # Validate tagname. + case $tagname in + *[!-_A-Za-z0-9,/]*) + func_fatal_error "invalid tag name: $tagname" + ;; + esac + + # Don't test for the "default" C tag, as we know it's + # there but not specially marked. + case $tagname in + CC) ;; + *) + if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then + taglist="$taglist $tagname" + + # Evaluate the configuration. Be careful to quote the path + # and the sed script, to avoid splitting on whitespace, but + # also don't use non-portable quotes within backquotes within + # quotes we have to do it in 2 steps: + extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"` + eval "$extractedcf" + else + func_error "ignoring unknown tag $tagname" + fi + ;; + esac +} + +# func_check_version_match +# Ensure that we are using m4 macros, and libtool script from the same +# release of libtool. +func_check_version_match () +{ + if test "$package_revision" != "$macro_revision"; then + if test "$VERSION" != "$macro_version"; then + if test -z "$macro_version"; then + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from an older release. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from $PACKAGE $macro_version. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + fi + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision, +$progname: but the definition of this LT_INIT comes from revision $macro_revision. +$progname: You should recreate aclocal.m4 with macros from revision $package_revision +$progname: of $PACKAGE $VERSION and run autoconf again. +_LT_EOF + fi + + exit $EXIT_MISMATCH + fi +} + + +# Shorthand for --mode=foo, only valid as the first argument +case $1 in +clean|clea|cle|cl) + shift; set dummy --mode clean ${1+"$@"}; shift + ;; +compile|compil|compi|comp|com|co|c) + shift; set dummy --mode compile ${1+"$@"}; shift + ;; +execute|execut|execu|exec|exe|ex|e) + shift; set dummy --mode execute ${1+"$@"}; shift + ;; +finish|finis|fini|fin|fi|f) + shift; set dummy --mode finish ${1+"$@"}; shift + ;; +install|instal|insta|inst|ins|in|i) + shift; set dummy --mode install ${1+"$@"}; shift + ;; +link|lin|li|l) + shift; set dummy --mode link ${1+"$@"}; shift + ;; +uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u) + shift; set dummy --mode uninstall ${1+"$@"}; shift + ;; +esac + + + +# Option defaults: +opt_debug=: +opt_dry_run=false +opt_config=false +opt_preserve_dup_deps=false +opt_features=false +opt_finish=false +opt_help=false +opt_help_all=false +opt_silent=: +opt_warning=: +opt_verbose=: +opt_silent=false +opt_verbose=false + + +# Parse options once, thoroughly. This comes as soon as possible in the +# script to make things like `--version' happen as quickly as we can. +{ + # this just eases exit handling + while test $# -gt 0; do + opt="$1" + shift + case $opt in + --debug|-x) opt_debug='set -x' + func_echo "enabling shell trace mode" + $opt_debug + ;; + --dry-run|--dryrun|-n) + opt_dry_run=: + ;; + --config) + opt_config=: +func_config + ;; + --dlopen|-dlopen) + optarg="$1" + opt_dlopen="${opt_dlopen+$opt_dlopen +}$optarg" + shift + ;; + --preserve-dup-deps) + opt_preserve_dup_deps=: + ;; + --features) + opt_features=: +func_features + ;; + --finish) + opt_finish=: +set dummy --mode finish ${1+"$@"}; shift + ;; + --help) + opt_help=: + ;; + --help-all) + opt_help_all=: +opt_help=': help-all' + ;; + --mode) + test $# = 0 && func_missing_arg $opt && break + optarg="$1" + opt_mode="$optarg" +case $optarg in + # Valid mode arguments: + clean|compile|execute|finish|install|link|relink|uninstall) ;; + + # Catch anything else as an error + *) func_error "invalid argument for $opt" + exit_cmd=exit + break + ;; +esac + shift + ;; + --no-silent|--no-quiet) + opt_silent=false +func_append preserve_args " $opt" + ;; + --no-warning|--no-warn) + opt_warning=false +func_append preserve_args " $opt" + ;; + --no-verbose) + opt_verbose=false +func_append preserve_args " $opt" + ;; + --silent|--quiet) + opt_silent=: +func_append preserve_args " $opt" + opt_verbose=false + ;; + --verbose|-v) + opt_verbose=: +func_append preserve_args " $opt" +opt_silent=false + ;; + --tag) + test $# = 0 && func_missing_arg $opt && break + optarg="$1" + opt_tag="$optarg" +func_append preserve_args " $opt $optarg" +func_enable_tag "$optarg" + shift + ;; + + -\?|-h) func_usage ;; + --help) func_help ;; + --version) func_version ;; + + # Separate optargs to long options: + --*=*) + func_split_long_opt "$opt" + set dummy "$func_split_long_opt_name" "$func_split_long_opt_arg" ${1+"$@"} + shift + ;; + + # Separate non-argument short options: + -\?*|-h*|-n*|-v*) + func_split_short_opt "$opt" + set dummy "$func_split_short_opt_name" "-$func_split_short_opt_arg" ${1+"$@"} + shift + ;; + + --) break ;; + -*) func_fatal_help "unrecognized option \`$opt'" ;; + *) set dummy "$opt" ${1+"$@"}; shift; break ;; + esac + done + + # Validate options: + + # save first non-option argument + if test "$#" -gt 0; then + nonopt="$opt" + shift + fi + + # preserve --debug + test "$opt_debug" = : || func_append preserve_args " --debug" + + case $host in + *cygwin* | *mingw* | *pw32* | *cegcc*) + # don't eliminate duplications in $postdeps and $predeps + opt_duplicate_compiler_generated_deps=: + ;; + *) + opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps + ;; + esac + + $opt_help || { + # Sanity checks first: + func_check_version_match + + if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then + func_fatal_configuration "not configured to build any kind of library" + fi + + # Darwin sucks + eval std_shrext=\"$shrext_cmds\" + + # Only execute mode is allowed to have -dlopen flags. + if test -n "$opt_dlopen" && test "$opt_mode" != execute; then + func_error "unrecognized option \`-dlopen'" + $ECHO "$help" 1>&2 + exit $EXIT_FAILURE + fi + + # Change the help message to a mode-specific one. + generic_help="$help" + help="Try \`$progname --help --mode=$opt_mode' for more information." + } + + + # Bail if the options were screwed + $exit_cmd $EXIT_FAILURE +} + + + + +## ----------- ## +## Main. ## +## ----------- ## + +# func_lalib_p file +# True iff FILE is a libtool `.la' library or `.lo' object file. +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_lalib_p () +{ + test -f "$1" && + $SED -e 4q "$1" 2>/dev/null \ + | $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1 +} + +# func_lalib_unsafe_p file +# True iff FILE is a libtool `.la' library or `.lo' object file. +# This function implements the same check as func_lalib_p without +# resorting to external programs. To this end, it redirects stdin and +# closes it afterwards, without saving the original file descriptor. +# As a safety measure, use it only where a negative result would be +# fatal anyway. Works if `file' does not exist. +func_lalib_unsafe_p () +{ + lalib_p=no + if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then + for lalib_p_l in 1 2 3 4 + do + read lalib_p_line + case "$lalib_p_line" in + \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;; + esac + done + exec 0<&5 5<&- + fi + test "$lalib_p" = yes +} + +# func_ltwrapper_script_p file +# True iff FILE is a libtool wrapper script +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_script_p () +{ + func_lalib_p "$1" +} + +# func_ltwrapper_executable_p file +# True iff FILE is a libtool wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_executable_p () +{ + func_ltwrapper_exec_suffix= + case $1 in + *.exe) ;; + *) func_ltwrapper_exec_suffix=.exe ;; + esac + $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1 +} + +# func_ltwrapper_scriptname file +# Assumes file is an ltwrapper_executable +# uses $file to determine the appropriate filename for a +# temporary ltwrapper_script. +func_ltwrapper_scriptname () +{ + func_dirname_and_basename "$1" "" "." + func_stripname '' '.exe' "$func_basename_result" + func_ltwrapper_scriptname_result="$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper" +} + +# func_ltwrapper_p file +# True iff FILE is a libtool wrapper script or wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_p () +{ + func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1" +} + + +# func_execute_cmds commands fail_cmd +# Execute tilde-delimited COMMANDS. +# If FAIL_CMD is given, eval that upon failure. +# FAIL_CMD may read-access the current command in variable CMD! +func_execute_cmds () +{ + $opt_debug + save_ifs=$IFS; IFS='~' + for cmd in $1; do + IFS=$save_ifs + eval cmd=\"$cmd\" + func_show_eval "$cmd" "${2-:}" + done + IFS=$save_ifs +} + + +# func_source file +# Source FILE, adding directory component if necessary. +# Note that it is not necessary on cygwin/mingw to append a dot to +# FILE even if both FILE and FILE.exe exist: automatic-append-.exe +# behavior happens only for exec(3), not for open(2)! Also, sourcing +# `FILE.' does not work on cygwin managed mounts. +func_source () +{ + $opt_debug + case $1 in + */* | *\\*) . "$1" ;; + *) . "./$1" ;; + esac +} + + +# func_resolve_sysroot PATH +# Replace a leading = in PATH with a sysroot. Store the result into +# func_resolve_sysroot_result +func_resolve_sysroot () +{ + func_resolve_sysroot_result=$1 + case $func_resolve_sysroot_result in + =*) + func_stripname '=' '' "$func_resolve_sysroot_result" + func_resolve_sysroot_result=$lt_sysroot$func_stripname_result + ;; + esac +} + +# func_replace_sysroot PATH +# If PATH begins with the sysroot, replace it with = and +# store the result into func_replace_sysroot_result. +func_replace_sysroot () +{ + case "$lt_sysroot:$1" in + ?*:"$lt_sysroot"*) + func_stripname "$lt_sysroot" '' "$1" + func_replace_sysroot_result="=$func_stripname_result" + ;; + *) + # Including no sysroot. + func_replace_sysroot_result=$1 + ;; + esac +} + +# func_infer_tag arg +# Infer tagged configuration to use if any are available and +# if one wasn't chosen via the "--tag" command line option. +# Only attempt this if the compiler in the base compile +# command doesn't match the default compiler. +# arg is usually of the form 'gcc ...' +func_infer_tag () +{ + $opt_debug + if test -n "$available_tags" && test -z "$tagname"; then + CC_quoted= + for arg in $CC; do + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case $@ in + # Blanks in the command may have been stripped by the calling shell, + # but not from the CC environment variable when configure was run. + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;; + # Blanks at the start of $base_compile will cause this to fail + # if we don't check for them as well. + *) + for z in $available_tags; do + if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then + # Evaluate the configuration. + eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`" + CC_quoted= + for arg in $CC; do + # Double-quote args containing other shell metacharacters. + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case "$@ " in + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) + # The compiler in the base compile command matches + # the one in the tagged configuration. + # Assume this is the tagged configuration we want. + tagname=$z + break + ;; + esac + fi + done + # If $tagname still isn't set, then no tagged configuration + # was found and let the user know that the "--tag" command + # line option must be used. + if test -z "$tagname"; then + func_echo "unable to infer tagged configuration" + func_fatal_error "specify a tag with \`--tag'" +# else +# func_verbose "using $tagname tagged configuration" + fi + ;; + esac + fi +} + + + +# func_write_libtool_object output_name pic_name nonpic_name +# Create a libtool object file (analogous to a ".la" file), +# but don't create it if we're doing a dry run. +func_write_libtool_object () +{ + write_libobj=${1} + if test "$build_libtool_libs" = yes; then + write_lobj=\'${2}\' + else + write_lobj=none + fi + + if test "$build_old_libs" = yes; then + write_oldobj=\'${3}\' + else + write_oldobj=none + fi + + $opt_dry_run || { + cat >${write_libobj}T <<EOF +# $write_libobj - a libtool object file +# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# Name of the PIC object. +pic_object=$write_lobj + +# Name of the non-PIC object +non_pic_object=$write_oldobj + +EOF + $MV "${write_libobj}T" "${write_libobj}" + } +} + + +################################################## +# FILE NAME AND PATH CONVERSION HELPER FUNCTIONS # +################################################## + +# func_convert_core_file_wine_to_w32 ARG +# Helper function used by file name conversion functions when $build is *nix, +# and $host is mingw, cygwin, or some other w32 environment. Relies on a +# correctly configured wine environment available, with the winepath program +# in $build's $PATH. +# +# ARG is the $build file name to be converted to w32 format. +# Result is available in $func_convert_core_file_wine_to_w32_result, and will +# be empty on error (or when ARG is empty) +func_convert_core_file_wine_to_w32 () +{ + $opt_debug + func_convert_core_file_wine_to_w32_result="$1" + if test -n "$1"; then + # Unfortunately, winepath does not exit with a non-zero error code, so we + # are forced to check the contents of stdout. On the other hand, if the + # command is not found, the shell will set an exit code of 127 and print + # *an error message* to stdout. So we must check for both error code of + # zero AND non-empty stdout, which explains the odd construction: + func_convert_core_file_wine_to_w32_tmp=`winepath -w "$1" 2>/dev/null` + if test "$?" -eq 0 && test -n "${func_convert_core_file_wine_to_w32_tmp}"; then + func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" | + $SED -e "$lt_sed_naive_backslashify"` + else + func_convert_core_file_wine_to_w32_result= + fi + fi +} +# end: func_convert_core_file_wine_to_w32 + + +# func_convert_core_path_wine_to_w32 ARG +# Helper function used by path conversion functions when $build is *nix, and +# $host is mingw, cygwin, or some other w32 environment. Relies on a correctly +# configured wine environment available, with the winepath program in $build's +# $PATH. Assumes ARG has no leading or trailing path separator characters. +# +# ARG is path to be converted from $build format to win32. +# Result is available in $func_convert_core_path_wine_to_w32_result. +# Unconvertible file (directory) names in ARG are skipped; if no directory names +# are convertible, then the result may be empty. +func_convert_core_path_wine_to_w32 () +{ + $opt_debug + # unfortunately, winepath doesn't convert paths, only file names + func_convert_core_path_wine_to_w32_result="" + if test -n "$1"; then + oldIFS=$IFS + IFS=: + for func_convert_core_path_wine_to_w32_f in $1; do + IFS=$oldIFS + func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f" + if test -n "$func_convert_core_file_wine_to_w32_result" ; then + if test -z "$func_convert_core_path_wine_to_w32_result"; then + func_convert_core_path_wine_to_w32_result="$func_convert_core_file_wine_to_w32_result" + else + func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result" + fi + fi + done + IFS=$oldIFS + fi +} +# end: func_convert_core_path_wine_to_w32 + + +# func_cygpath ARGS... +# Wrapper around calling the cygpath program via LT_CYGPATH. This is used when +# when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2) +# $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or +# (2), returns the Cygwin file name or path in func_cygpath_result (input +# file name or path is assumed to be in w32 format, as previously converted +# from $build's *nix or MSYS format). In case (3), returns the w32 file name +# or path in func_cygpath_result (input file name or path is assumed to be in +# Cygwin format). Returns an empty string on error. +# +# ARGS are passed to cygpath, with the last one being the file name or path to +# be converted. +# +# Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH +# environment variable; do not put it in $PATH. +func_cygpath () +{ + $opt_debug + if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then + func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null` + if test "$?" -ne 0; then + # on failure, ensure result is empty + func_cygpath_result= + fi + else + func_cygpath_result= + func_error "LT_CYGPATH is empty or specifies non-existent file: \`$LT_CYGPATH'" + fi +} +#end: func_cygpath + + +# func_convert_core_msys_to_w32 ARG +# Convert file name or path ARG from MSYS format to w32 format. Return +# result in func_convert_core_msys_to_w32_result. +func_convert_core_msys_to_w32 () +{ + $opt_debug + # awkward: cmd appends spaces to result + func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null | + $SED -e 's/[ ]*$//' -e "$lt_sed_naive_backslashify"` +} +#end: func_convert_core_msys_to_w32 + + +# func_convert_file_check ARG1 ARG2 +# Verify that ARG1 (a file name in $build format) was converted to $host +# format in ARG2. Otherwise, emit an error message, but continue (resetting +# func_to_host_file_result to ARG1). +func_convert_file_check () +{ + $opt_debug + if test -z "$2" && test -n "$1" ; then + func_error "Could not determine host file name corresponding to" + func_error " \`$1'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback: + func_to_host_file_result="$1" + fi +} +# end func_convert_file_check + + +# func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH +# Verify that FROM_PATH (a path in $build format) was converted to $host +# format in TO_PATH. Otherwise, emit an error message, but continue, resetting +# func_to_host_file_result to a simplistic fallback value (see below). +func_convert_path_check () +{ + $opt_debug + if test -z "$4" && test -n "$3"; then + func_error "Could not determine the host path corresponding to" + func_error " \`$3'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback. This is a deliberately simplistic "conversion" and + # should not be "improved". See libtool.info. + if test "x$1" != "x$2"; then + lt_replace_pathsep_chars="s|$1|$2|g" + func_to_host_path_result=`echo "$3" | + $SED -e "$lt_replace_pathsep_chars"` + else + func_to_host_path_result="$3" + fi + fi +} +# end func_convert_path_check + + +# func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG +# Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT +# and appending REPL if ORIG matches BACKPAT. +func_convert_path_front_back_pathsep () +{ + $opt_debug + case $4 in + $1 ) func_to_host_path_result="$3$func_to_host_path_result" + ;; + esac + case $4 in + $2 ) func_append func_to_host_path_result "$3" + ;; + esac +} +# end func_convert_path_front_back_pathsep + + +################################################## +# $build to $host FILE NAME CONVERSION FUNCTIONS # +################################################## +# invoked via `$to_host_file_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# Result will be available in $func_to_host_file_result. + + +# func_to_host_file ARG +# Converts the file name ARG from $build format to $host format. Return result +# in func_to_host_file_result. +func_to_host_file () +{ + $opt_debug + $to_host_file_cmd "$1" +} +# end func_to_host_file + + +# func_to_tool_file ARG LAZY +# converts the file name ARG from $build format to toolchain format. Return +# result in func_to_tool_file_result. If the conversion in use is listed +# in (the comma separated) LAZY, no conversion takes place. +func_to_tool_file () +{ + $opt_debug + case ,$2, in + *,"$to_tool_file_cmd",*) + func_to_tool_file_result=$1 + ;; + *) + $to_tool_file_cmd "$1" + func_to_tool_file_result=$func_to_host_file_result + ;; + esac +} +# end func_to_tool_file + + +# func_convert_file_noop ARG +# Copy ARG to func_to_host_file_result. +func_convert_file_noop () +{ + func_to_host_file_result="$1" +} +# end func_convert_file_noop + + +# func_convert_file_msys_to_w32 ARG +# Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_file_result. +func_convert_file_msys_to_w32 () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_to_host_file_result="$func_convert_core_msys_to_w32_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_w32 + + +# func_convert_file_cygwin_to_w32 ARG +# Convert file name ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_file_cygwin_to_w32 () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + # because $build is cygwin, we call "the" cygpath in $PATH; no need to use + # LT_CYGPATH in this case. + func_to_host_file_result=`cygpath -m "$1"` + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_cygwin_to_w32 + + +# func_convert_file_nix_to_w32 ARG +# Convert file name ARG from *nix to w32 format. Requires a wine environment +# and a working winepath. Returns result in func_to_host_file_result. +func_convert_file_nix_to_w32 () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + func_convert_core_file_wine_to_w32 "$1" + func_to_host_file_result="$func_convert_core_file_wine_to_w32_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_w32 + + +# func_convert_file_msys_to_cygwin ARG +# Convert file name ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_file_msys_to_cygwin () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_cygpath -u "$func_convert_core_msys_to_w32_result" + func_to_host_file_result="$func_cygpath_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_cygwin + + +# func_convert_file_nix_to_cygwin ARG +# Convert file name ARG from *nix to Cygwin format. Requires Cygwin installed +# in a wine environment, working winepath, and LT_CYGPATH set. Returns result +# in func_to_host_file_result. +func_convert_file_nix_to_cygwin () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + # convert from *nix to w32, then use cygpath to convert from w32 to cygwin. + func_convert_core_file_wine_to_w32 "$1" + func_cygpath -u "$func_convert_core_file_wine_to_w32_result" + func_to_host_file_result="$func_cygpath_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_cygwin + + +############################################# +# $build to $host PATH CONVERSION FUNCTIONS # +############################################# +# invoked via `$to_host_path_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# The result will be available in $func_to_host_path_result. +# +# Path separators are also converted from $build format to $host format. If +# ARG begins or ends with a path separator character, it is preserved (but +# converted to $host format) on output. +# +# All path conversion functions are named using the following convention: +# file name conversion function : func_convert_file_X_to_Y () +# path conversion function : func_convert_path_X_to_Y () +# where, for any given $build/$host combination the 'X_to_Y' value is the +# same. If conversion functions are added for new $build/$host combinations, +# the two new functions must follow this pattern, or func_init_to_host_path_cmd +# will break. + + +# func_init_to_host_path_cmd +# Ensures that function "pointer" variable $to_host_path_cmd is set to the +# appropriate value, based on the value of $to_host_file_cmd. +to_host_path_cmd= +func_init_to_host_path_cmd () +{ + $opt_debug + if test -z "$to_host_path_cmd"; then + func_stripname 'func_convert_file_' '' "$to_host_file_cmd" + to_host_path_cmd="func_convert_path_${func_stripname_result}" + fi +} + + +# func_to_host_path ARG +# Converts the path ARG from $build format to $host format. Return result +# in func_to_host_path_result. +func_to_host_path () +{ + $opt_debug + func_init_to_host_path_cmd + $to_host_path_cmd "$1" +} +# end func_to_host_path + + +# func_convert_path_noop ARG +# Copy ARG to func_to_host_path_result. +func_convert_path_noop () +{ + func_to_host_path_result="$1" +} +# end func_convert_path_noop + + +# func_convert_path_msys_to_w32 ARG +# Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_path_result. +func_convert_path_msys_to_w32 () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # Remove leading and trailing path separator characters from ARG. MSYS + # behavior is inconsistent here; cygpath turns them into '.;' and ';.'; + # and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result="$func_convert_core_msys_to_w32_result" + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_msys_to_w32 + + +# func_convert_path_cygwin_to_w32 ARG +# Convert path ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_path_cygwin_to_w32 () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"` + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_cygwin_to_w32 + + +# func_convert_path_nix_to_w32 ARG +# Convert path ARG from *nix to w32 format. Requires a wine environment and +# a working winepath. Returns result in func_to_host_file_result. +func_convert_path_nix_to_w32 () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result="$func_convert_core_path_wine_to_w32_result" + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_nix_to_w32 + + +# func_convert_path_msys_to_cygwin ARG +# Convert path ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_path_msys_to_cygwin () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_msys_to_w32_result" + func_to_host_path_result="$func_cygpath_result" + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_msys_to_cygwin + + +# func_convert_path_nix_to_cygwin ARG +# Convert path ARG from *nix to Cygwin format. Requires Cygwin installed in a +# a wine environment, working winepath, and LT_CYGPATH set. Returns result in +# func_to_host_file_result. +func_convert_path_nix_to_cygwin () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # Remove leading and trailing path separator characters from + # ARG. msys behavior is inconsistent here, cygpath turns them + # into '.;' and ';.', and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result" + func_to_host_path_result="$func_cygpath_result" + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_nix_to_cygwin + + +# func_mode_compile arg... +func_mode_compile () +{ + $opt_debug + # Get the compilation command and the source file. + base_compile= + srcfile="$nonopt" # always keep a non-empty value in "srcfile" + suppress_opt=yes + suppress_output= + arg_mode=normal + libobj= + later= + pie_flag= + + for arg + do + case $arg_mode in + arg ) + # do not "continue". Instead, add this to base_compile + lastarg="$arg" + arg_mode=normal + ;; + + target ) + libobj="$arg" + arg_mode=normal + continue + ;; + + normal ) + # Accept any command-line options. + case $arg in + -o) + test -n "$libobj" && \ + func_fatal_error "you cannot specify \`-o' more than once" + arg_mode=target + continue + ;; + + -pie | -fpie | -fPIE) + func_append pie_flag " $arg" + continue + ;; + + -shared | -static | -prefer-pic | -prefer-non-pic) + func_append later " $arg" + continue + ;; + + -no-suppress) + suppress_opt=no + continue + ;; + + -Xcompiler) + arg_mode=arg # the next one goes into the "base_compile" arg list + continue # The current "srcfile" will either be retained or + ;; # replaced later. I would guess that would be a bug. + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + lastarg= + save_ifs="$IFS"; IFS=',' + for arg in $args; do + IFS="$save_ifs" + func_append_quoted lastarg "$arg" + done + IFS="$save_ifs" + func_stripname ' ' '' "$lastarg" + lastarg=$func_stripname_result + + # Add the arguments to base_compile. + func_append base_compile " $lastarg" + continue + ;; + + *) + # Accept the current argument as the source file. + # The previous "srcfile" becomes the current argument. + # + lastarg="$srcfile" + srcfile="$arg" + ;; + esac # case $arg + ;; + esac # case $arg_mode + + # Aesthetically quote the previous argument. + func_append_quoted base_compile "$lastarg" + done # for arg + + case $arg_mode in + arg) + func_fatal_error "you must specify an argument for -Xcompile" + ;; + target) + func_fatal_error "you must specify a target with \`-o'" + ;; + *) + # Get the name of the library object. + test -z "$libobj" && { + func_basename "$srcfile" + libobj="$func_basename_result" + } + ;; + esac + + # Recognize several different file suffixes. + # If the user specifies -o file.o, it is replaced with file.lo + case $libobj in + *.[cCFSifmso] | \ + *.ada | *.adb | *.ads | *.asm | \ + *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \ + *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup) + func_xform "$libobj" + libobj=$func_xform_result + ;; + esac + + case $libobj in + *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;; + *) + func_fatal_error "cannot determine name of library object from \`$libobj'" + ;; + esac + + func_infer_tag $base_compile + + for arg in $later; do + case $arg in + -shared) + test "$build_libtool_libs" != yes && \ + func_fatal_configuration "can not build a shared library" + build_old_libs=no + continue + ;; + + -static) + build_libtool_libs=no + build_old_libs=yes + continue + ;; + + -prefer-pic) + pic_mode=yes + continue + ;; + + -prefer-non-pic) + pic_mode=no + continue + ;; + esac + done + + func_quote_for_eval "$libobj" + test "X$libobj" != "X$func_quote_for_eval_result" \ + && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \ + && func_warning "libobj name \`$libobj' may not contain shell special characters." + func_dirname_and_basename "$obj" "/" "" + objname="$func_basename_result" + xdir="$func_dirname_result" + lobj=${xdir}$objdir/$objname + + test -z "$base_compile" && \ + func_fatal_help "you must specify a compilation command" + + # Delete any leftover library objects. + if test "$build_old_libs" = yes; then + removelist="$obj $lobj $libobj ${libobj}T" + else + removelist="$lobj $libobj ${libobj}T" + fi + + # On Cygwin there's no "real" PIC flag so we must build both object types + case $host_os in + cygwin* | mingw* | pw32* | os2* | cegcc*) + pic_mode=default + ;; + esac + if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then + # non-PIC code in shared libraries is not supported + pic_mode=default + fi + + # Calculate the filename of the output object if compiler does + # not support -o with -c + if test "$compiler_c_o" = no; then + output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.${objext} + lockfile="$output_obj.lock" + else + output_obj= + need_locks=no + lockfile= + fi + + # Lock this critical section if it is needed + # We use this script file to make the link, it avoids creating a new file + if test "$need_locks" = yes; then + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + elif test "$need_locks" = warn; then + if test -f "$lockfile"; then + $ECHO "\ +*** ERROR, $lockfile exists and contains: +`cat $lockfile 2>/dev/null` + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + func_append removelist " $output_obj" + $ECHO "$srcfile" > "$lockfile" + fi + + $opt_dry_run || $RM $removelist + func_append removelist " $lockfile" + trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15 + + func_to_tool_file "$srcfile" func_convert_file_msys_to_w32 + srcfile=$func_to_tool_file_result + func_quote_for_eval "$srcfile" + qsrcfile=$func_quote_for_eval_result + + # Only build a PIC object if we are building libtool libraries. + if test "$build_libtool_libs" = yes; then + # Without this assignment, base_compile gets emptied. + fbsd_hideous_sh_bug=$base_compile + + if test "$pic_mode" != no; then + command="$base_compile $qsrcfile $pic_flag" + else + # Don't build PIC code + command="$base_compile $qsrcfile" + fi + + func_mkdir_p "$xdir$objdir" + + if test -z "$output_obj"; then + # Place PIC objects in $objdir + func_append command " -o $lobj" + fi + + func_show_eval_locale "$command" \ + 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE' + + if test "$need_locks" = warn && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed, then go on to compile the next one + if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then + func_show_eval '$MV "$output_obj" "$lobj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + + # Allow error messages only from the first compilation. + if test "$suppress_opt" = yes; then + suppress_output=' >/dev/null 2>&1' + fi + fi + + # Only build a position-dependent object if we build old libraries. + if test "$build_old_libs" = yes; then + if test "$pic_mode" != yes; then + # Don't build PIC code + command="$base_compile $qsrcfile$pie_flag" + else + command="$base_compile $qsrcfile $pic_flag" + fi + if test "$compiler_c_o" = yes; then + func_append command " -o $obj" + fi + + # Suppress compiler output if we already did a PIC compilation. + func_append command "$suppress_output" + func_show_eval_locale "$command" \ + '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' + + if test "$need_locks" = warn && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed + if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then + func_show_eval '$MV "$output_obj" "$obj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + fi + + $opt_dry_run || { + func_write_libtool_object "$libobj" "$objdir/$objname" "$objname" + + # Unlock the critical section if it was locked + if test "$need_locks" != no; then + removelist=$lockfile + $RM "$lockfile" + fi + } + + exit $EXIT_SUCCESS +} + +$opt_help || { + test "$opt_mode" = compile && func_mode_compile ${1+"$@"} +} + +func_mode_help () +{ + # We need to display help for each of the modes. + case $opt_mode in + "") + # Generic help is extracted from the usage comments + # at the start of this file. + func_help + ;; + + clean) + $ECHO \ +"Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE... + +Remove files from the build directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed +to RM. + +If FILE is a libtool library, object or program, all the files associated +with it are deleted. Otherwise, only FILE itself is deleted using RM." + ;; + + compile) + $ECHO \ +"Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE + +Compile a source file into a libtool library object. + +This mode accepts the following additional options: + + -o OUTPUT-FILE set the output file name to OUTPUT-FILE + -no-suppress do not suppress compiler output for multiple passes + -prefer-pic try to build PIC objects only + -prefer-non-pic try to build non-PIC objects only + -shared do not build a \`.o' file suitable for static linking + -static only build a \`.o' file suitable for static linking + -Wc,FLAG pass FLAG directly to the compiler + +COMPILE-COMMAND is a command to be used in creating a \`standard' object file +from the given SOURCEFILE. + +The output file name is determined by removing the directory component from +SOURCEFILE, then substituting the C source code suffix \`.c' with the +library object suffix, \`.lo'." + ;; + + execute) + $ECHO \ +"Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]... + +Automatically set library path, then run a program. + +This mode accepts the following additional options: + + -dlopen FILE add the directory containing FILE to the library path + +This mode sets the library path environment variable according to \`-dlopen' +flags. + +If any of the ARGS are libtool executable wrappers, then they are translated +into their corresponding uninstalled binary, and any of their required library +directories are added to the library path. + +Then, COMMAND is executed, with ARGS as arguments." + ;; + + finish) + $ECHO \ +"Usage: $progname [OPTION]... --mode=finish [LIBDIR]... + +Complete the installation of libtool libraries. + +Each LIBDIR is a directory that contains libtool libraries. + +The commands that this mode executes may require superuser privileges. Use +the \`--dry-run' option if you just want to see what would be executed." + ;; + + install) + $ECHO \ +"Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND... + +Install executables or libraries. + +INSTALL-COMMAND is the installation command. The first component should be +either the \`install' or \`cp' program. + +The following components of INSTALL-COMMAND are treated specially: + + -inst-prefix-dir PREFIX-DIR Use PREFIX-DIR as a staging area for installation + +The rest of the components are interpreted as arguments to that command (only +BSD-compatible install options are recognized)." + ;; + + link) + $ECHO \ +"Usage: $progname [OPTION]... --mode=link LINK-COMMAND... + +Link object files or libraries together to form another library, or to +create an executable program. + +LINK-COMMAND is a command using the C compiler that you would use to create +a program from several object files. + +The following components of LINK-COMMAND are treated specially: + + -all-static do not do any dynamic linking at all + -avoid-version do not add a version suffix if possible + -bindir BINDIR specify path to binaries directory (for systems where + libraries must be found in the PATH setting at runtime) + -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime + -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols + -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) + -export-symbols SYMFILE + try to export only the symbols listed in SYMFILE + -export-symbols-regex REGEX + try to export only the symbols matching REGEX + -LLIBDIR search LIBDIR for required installed libraries + -lNAME OUTPUT-FILE requires the installed library libNAME + -module build a library that can dlopened + -no-fast-install disable the fast-install mode + -no-install link a not-installable executable + -no-undefined declare that a library does not refer to external symbols + -o OUTPUT-FILE create OUTPUT-FILE from the specified objects + -objectlist FILE Use a list of object files found in FILE to specify objects + -precious-files-regex REGEX + don't remove output files matching REGEX + -release RELEASE specify package release information + -rpath LIBDIR the created library will eventually be installed in LIBDIR + -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries + -shared only do dynamic linking of libtool libraries + -shrext SUFFIX override the standard shared library file extension + -static do not do any dynamic linking of uninstalled libtool libraries + -static-libtool-libs + do not do any dynamic linking of libtool libraries + -version-info CURRENT[:REVISION[:AGE]] + specify library version info [each variable defaults to 0] + -weak LIBNAME declare that the target provides the LIBNAME interface + -Wc,FLAG + -Xcompiler FLAG pass linker-specific FLAG directly to the compiler + -Wl,FLAG + -Xlinker FLAG pass linker-specific FLAG directly to the linker + -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) + +All other options (arguments beginning with \`-') are ignored. + +Every other argument is treated as a filename. Files ending in \`.la' are +treated as uninstalled libtool libraries, other files are standard or library +object files. + +If the OUTPUT-FILE ends in \`.la', then a libtool library is created, +only library objects (\`.lo' files) may be specified, and \`-rpath' is +required, except when creating a convenience library. + +If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created +using \`ar' and \`ranlib', or on Windows using \`lib'. + +If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file +is created, otherwise an executable program is created." + ;; + + uninstall) + $ECHO \ +"Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... + +Remove libraries from an installation directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed +to RM. + +If FILE is a libtool library, all the files associated with it are deleted. +Otherwise, only FILE itself is deleted using RM." + ;; + + *) + func_fatal_help "invalid operation mode \`$opt_mode'" + ;; + esac + + echo + $ECHO "Try \`$progname --help' for more information about other modes." +} + +# Now that we've collected a possible --mode arg, show help if necessary +if $opt_help; then + if test "$opt_help" = :; then + func_mode_help + else + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + func_mode_help + done + } | sed -n '1p; 2,$s/^Usage:/ or: /p' + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + echo + func_mode_help + done + } | + sed '1d + /^When reporting/,/^Report/{ + H + d + } + $x + /information about other modes/d + /more detailed .*MODE/d + s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/' + fi + exit $? +fi + + +# func_mode_execute arg... +func_mode_execute () +{ + $opt_debug + # The first argument is the command name. + cmd="$nonopt" + test -z "$cmd" && \ + func_fatal_help "you must specify a COMMAND" + + # Handle -dlopen flags immediately. + for file in $opt_dlopen; do + test -f "$file" \ + || func_fatal_help "\`$file' is not a file" + + dir= + case $file in + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "\`$lib' is not a valid libtool archive" + + # Read the libtool library. + dlname= + library_names= + func_source "$file" + + # Skip this library if it cannot be dlopened. + if test -z "$dlname"; then + # Warn if it was a shared library. + test -n "$library_names" && \ + func_warning "\`$file' was not linked with \`-export-dynamic'" + continue + fi + + func_dirname "$file" "" "." + dir="$func_dirname_result" + + if test -f "$dir/$objdir/$dlname"; then + func_append dir "/$objdir" + else + if test ! -f "$dir/$dlname"; then + func_fatal_error "cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" + fi + fi + ;; + + *.lo) + # Just add the directory containing the .lo file. + func_dirname "$file" "" "." + dir="$func_dirname_result" + ;; + + *) + func_warning "\`-dlopen' is ignored for non-libtool libraries and objects" + continue + ;; + esac + + # Get the absolute pathname. + absdir=`cd "$dir" && pwd` + test -n "$absdir" && dir="$absdir" + + # Now add the directory to shlibpath_var. + if eval "test -z \"\$$shlibpath_var\""; then + eval "$shlibpath_var=\"\$dir\"" + else + eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" + fi + done + + # This variable tells wrapper scripts just to set shlibpath_var + # rather than running their programs. + libtool_execute_magic="$magic" + + # Check if any of the arguments is a wrapper script. + args= + for file + do + case $file in + -* | *.la | *.lo ) ;; + *) + # Do a test to see if this is really a libtool program. + if func_ltwrapper_script_p "$file"; then + func_source "$file" + # Transform arg to wrapped name. + file="$progdir/$program" + elif func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + func_source "$func_ltwrapper_scriptname_result" + # Transform arg to wrapped name. + file="$progdir/$program" + fi + ;; + esac + # Quote arguments (to preserve shell metacharacters). + func_append_quoted args "$file" + done + + if test "X$opt_dry_run" = Xfalse; then + if test -n "$shlibpath_var"; then + # Export the shlibpath_var. + eval "export $shlibpath_var" + fi + + # Restore saved environment variables + for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES + do + eval "if test \"\${save_$lt_var+set}\" = set; then + $lt_var=\$save_$lt_var; export $lt_var + else + $lt_unset $lt_var + fi" + done + + # Now prepare to actually exec the command. + exec_cmd="\$cmd$args" + else + # Display what would be done. + if test -n "$shlibpath_var"; then + eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\"" + echo "export $shlibpath_var" + fi + $ECHO "$cmd$args" + exit $EXIT_SUCCESS + fi +} + +test "$opt_mode" = execute && func_mode_execute ${1+"$@"} + + +# func_mode_finish arg... +func_mode_finish () +{ + $opt_debug + libs= + libdirs= + admincmds= + + for opt in "$nonopt" ${1+"$@"} + do + if test -d "$opt"; then + func_append libdirs " $opt" + + elif test -f "$opt"; then + if func_lalib_unsafe_p "$opt"; then + func_append libs " $opt" + else + func_warning "\`$opt' is not a valid libtool archive" + fi + + else + func_fatal_error "invalid argument \`$opt'" + fi + done + + if test -n "$libs"; then + if test -n "$lt_sysroot"; then + sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"` + sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;" + else + sysroot_cmd= + fi + + # Remove sysroot references + if $opt_dry_run; then + for lib in $libs; do + echo "removing references to $lt_sysroot and \`=' prefixes from $lib" + done + else + tmpdir=`func_mktempdir` + for lib in $libs; do + sed -e "${sysroot_cmd} s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \ + > $tmpdir/tmp-la + mv -f $tmpdir/tmp-la $lib + done + ${RM}r "$tmpdir" + fi + fi + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + for libdir in $libdirs; do + if test -n "$finish_cmds"; then + # Do each command in the finish commands. + func_execute_cmds "$finish_cmds" 'admincmds="$admincmds +'"$cmd"'"' + fi + if test -n "$finish_eval"; then + # Do the single finish_eval. + eval cmds=\"$finish_eval\" + $opt_dry_run || eval "$cmds" || func_append admincmds " + $cmds" + fi + done + fi + + # Exit here if they wanted silent mode. + $opt_silent && exit $EXIT_SUCCESS + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + echo "----------------------------------------------------------------------" + echo "Libraries have been installed in:" + for libdir in $libdirs; do + $ECHO " $libdir" + done + echo + echo "If you ever happen to want to link against installed libraries" + echo "in a given directory, LIBDIR, you must either use libtool, and" + echo "specify the full pathname of the library, or use the \`-LLIBDIR'" + echo "flag during linking and do at least one of the following:" + if test -n "$shlibpath_var"; then + echo " - add LIBDIR to the \`$shlibpath_var' environment variable" + echo " during execution" + fi + if test -n "$runpath_var"; then + echo " - add LIBDIR to the \`$runpath_var' environment variable" + echo " during linking" + fi + if test -n "$hardcode_libdir_flag_spec"; then + libdir=LIBDIR + eval flag=\"$hardcode_libdir_flag_spec\" + + $ECHO " - use the \`$flag' linker flag" + fi + if test -n "$admincmds"; then + $ECHO " - have your system administrator run these commands:$admincmds" + fi + if test -f /etc/ld.so.conf; then + echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'" + fi + echo + + echo "See any operating system documentation about shared libraries for" + case $host in + solaris2.[6789]|solaris2.1[0-9]) + echo "more information, such as the ld(1), crle(1) and ld.so(8) manual" + echo "pages." + ;; + *) + echo "more information, such as the ld(1) and ld.so(8) manual pages." + ;; + esac + echo "----------------------------------------------------------------------" + fi + exit $EXIT_SUCCESS +} + +test "$opt_mode" = finish && func_mode_finish ${1+"$@"} + + +# func_mode_install arg... +func_mode_install () +{ + $opt_debug + # There may be an optional sh(1) argument at the beginning of + # install_prog (especially on Windows NT). + if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh || + # Allow the use of GNU shtool's install command. + case $nonopt in *shtool*) :;; *) false;; esac; then + # Aesthetically quote it. + func_quote_for_eval "$nonopt" + install_prog="$func_quote_for_eval_result " + arg=$1 + shift + else + install_prog= + arg=$nonopt + fi + + # The real first argument should be the name of the installation program. + # Aesthetically quote it. + func_quote_for_eval "$arg" + func_append install_prog "$func_quote_for_eval_result" + install_shared_prog=$install_prog + case " $install_prog " in + *[\\\ /]cp\ *) install_cp=: ;; + *) install_cp=false ;; + esac + + # We need to accept at least all the BSD install flags. + dest= + files= + opts= + prev= + install_type= + isdir=no + stripme= + no_mode=: + for arg + do + arg2= + if test -n "$dest"; then + func_append files " $dest" + dest=$arg + continue + fi + + case $arg in + -d) isdir=yes ;; + -f) + if $install_cp; then :; else + prev=$arg + fi + ;; + -g | -m | -o) + prev=$arg + ;; + -s) + stripme=" -s" + continue + ;; + -*) + ;; + *) + # If the previous option needed an argument, then skip it. + if test -n "$prev"; then + if test "x$prev" = x-m && test -n "$install_override_mode"; then + arg2=$install_override_mode + no_mode=false + fi + prev= + else + dest=$arg + continue + fi + ;; + esac + + # Aesthetically quote the argument. + func_quote_for_eval "$arg" + func_append install_prog " $func_quote_for_eval_result" + if test -n "$arg2"; then + func_quote_for_eval "$arg2" + fi + func_append install_shared_prog " $func_quote_for_eval_result" + done + + test -z "$install_prog" && \ + func_fatal_help "you must specify an install program" + + test -n "$prev" && \ + func_fatal_help "the \`$prev' option requires an argument" + + if test -n "$install_override_mode" && $no_mode; then + if $install_cp; then :; else + func_quote_for_eval "$install_override_mode" + func_append install_shared_prog " -m $func_quote_for_eval_result" + fi + fi + + if test -z "$files"; then + if test -z "$dest"; then + func_fatal_help "no file or destination specified" + else + func_fatal_help "you must specify a destination" + fi + fi + + # Strip any trailing slash from the destination. + func_stripname '' '/' "$dest" + dest=$func_stripname_result + + # Check to see that the destination is a directory. + test -d "$dest" && isdir=yes + if test "$isdir" = yes; then + destdir="$dest" + destname= + else + func_dirname_and_basename "$dest" "" "." + destdir="$func_dirname_result" + destname="$func_basename_result" + + # Not a directory, so check to see that there is only one file specified. + set dummy $files; shift + test "$#" -gt 1 && \ + func_fatal_help "\`$dest' is not a directory" + fi + case $destdir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + for file in $files; do + case $file in + *.lo) ;; + *) + func_fatal_help "\`$destdir' must be an absolute directory name" + ;; + esac + done + ;; + esac + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic="$magic" + + staticlibs= + future_libdirs= + current_libdirs= + for file in $files; do + + # Do each installation. + case $file in + *.$libext) + # Do the static libraries later. + func_append staticlibs " $file" + ;; + + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "\`$file' is not a valid libtool archive" + + library_names= + old_library= + relink_command= + func_source "$file" + + # Add the libdir to current_libdirs if it is the destination. + if test "X$destdir" = "X$libdir"; then + case "$current_libdirs " in + *" $libdir "*) ;; + *) func_append current_libdirs " $libdir" ;; + esac + else + # Note the libdir as a future libdir. + case "$future_libdirs " in + *" $libdir "*) ;; + *) func_append future_libdirs " $libdir" ;; + esac + fi + + func_dirname "$file" "/" "" + dir="$func_dirname_result" + func_append dir "$objdir" + + if test -n "$relink_command"; then + # Determine the prefix the user has applied to our future dir. + inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"` + + # Don't allow the user to place us outside of our expected + # location b/c this prevents finding dependent libraries that + # are installed to the same prefix. + # At present, this check doesn't affect windows .dll's that + # are installed into $libdir/../bin (currently, that works fine) + # but it's something to keep an eye on. + test "$inst_prefix_dir" = "$destdir" && \ + func_fatal_error "error: cannot install \`$file' to a directory not ending in $libdir" + + if test -n "$inst_prefix_dir"; then + # Stick the inst_prefix_dir data into the link command. + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"` + else + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"` + fi + + func_warning "relinking \`$file'" + func_show_eval "$relink_command" \ + 'func_fatal_error "error: relink \`$file'\'' with the above command before installing it"' + fi + + # See the names of the shared library. + set dummy $library_names; shift + if test -n "$1"; then + realname="$1" + shift + + srcname="$realname" + test -n "$relink_command" && srcname="$realname"T + + # Install the shared library and build the symlinks. + func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \ + 'exit $?' + tstripme="$stripme" + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + case $realname in + *.dll.a) + tstripme="" + ;; + esac + ;; + esac + if test -n "$tstripme" && test -n "$striplib"; then + func_show_eval "$striplib $destdir/$realname" 'exit $?' + fi + + if test "$#" -gt 0; then + # Delete the old symlinks, and create new ones. + # Try `ln -sf' first, because the `ln' binary might depend on + # the symlink we replace! Solaris /bin/ln does not understand -f, + # so we also need to try rm && ln -s. + for linkname + do + test "$linkname" != "$realname" \ + && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })" + done + fi + + # Do each command in the postinstall commands. + lib="$destdir/$realname" + func_execute_cmds "$postinstall_cmds" 'exit $?' + fi + + # Install the pseudo-library for information purposes. + func_basename "$file" + name="$func_basename_result" + instname="$dir/$name"i + func_show_eval "$install_prog $instname $destdir/$name" 'exit $?' + + # Maybe install the static library, too. + test -n "$old_library" && func_append staticlibs " $dir/$old_library" + ;; + + *.lo) + # Install (i.e. copy) a libtool object. + + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile="$destdir/$destname" + else + func_basename "$file" + destfile="$func_basename_result" + destfile="$destdir/$destfile" + fi + + # Deduce the name of the destination old-style object file. + case $destfile in + *.lo) + func_lo2o "$destfile" + staticdest=$func_lo2o_result + ;; + *.$objext) + staticdest="$destfile" + destfile= + ;; + *) + func_fatal_help "cannot copy a libtool object to \`$destfile'" + ;; + esac + + # Install the libtool object if requested. + test -n "$destfile" && \ + func_show_eval "$install_prog $file $destfile" 'exit $?' + + # Install the old object if enabled. + if test "$build_old_libs" = yes; then + # Deduce the name of the old-style object file. + func_lo2o "$file" + staticobj=$func_lo2o_result + func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?' + fi + exit $EXIT_SUCCESS + ;; + + *) + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile="$destdir/$destname" + else + func_basename "$file" + destfile="$func_basename_result" + destfile="$destdir/$destfile" + fi + + # If the file is missing, and there is a .exe on the end, strip it + # because it is most likely a libtool script we actually want to + # install + stripped_ext="" + case $file in + *.exe) + if test ! -f "$file"; then + func_stripname '' '.exe' "$file" + file=$func_stripname_result + stripped_ext=".exe" + fi + ;; + esac + + # Do a test to see if this is really a libtool program. + case $host in + *cygwin* | *mingw*) + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + wrapper=$func_ltwrapper_scriptname_result + else + func_stripname '' '.exe' "$file" + wrapper=$func_stripname_result + fi + ;; + *) + wrapper=$file + ;; + esac + if func_ltwrapper_script_p "$wrapper"; then + notinst_deplibs= + relink_command= + + func_source "$wrapper" + + # Check the variables that should have been set. + test -z "$generated_by_libtool_version" && \ + func_fatal_error "invalid libtool wrapper script \`$wrapper'" + + finalize=yes + for lib in $notinst_deplibs; do + # Check to see that each library is installed. + libdir= + if test -f "$lib"; then + func_source "$lib" + fi + libfile="$libdir/"`$ECHO "$lib" | $SED 's%^.*/%%g'` ### testsuite: skip nested quoting test + if test -n "$libdir" && test ! -f "$libfile"; then + func_warning "\`$lib' has not been installed in \`$libdir'" + finalize=no + fi + done + + relink_command= + func_source "$wrapper" + + outputname= + if test "$fast_install" = no && test -n "$relink_command"; then + $opt_dry_run || { + if test "$finalize" = yes; then + tmpdir=`func_mktempdir` + func_basename "$file$stripped_ext" + file="$func_basename_result" + outputname="$tmpdir/$file" + # Replace the output file specification. + relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'` + + $opt_silent || { + func_quote_for_expand "$relink_command" + eval "func_echo $func_quote_for_expand_result" + } + if eval "$relink_command"; then : + else + func_error "error: relink \`$file' with the above command before installing it" + $opt_dry_run || ${RM}r "$tmpdir" + continue + fi + file="$outputname" + else + func_warning "cannot relink \`$file'" + fi + } + else + # Install the binary that we compiled earlier. + file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"` + fi + fi + + # remove .exe since cygwin /usr/bin/install will append another + # one anyway + case $install_prog,$host in + */usr/bin/install*,*cygwin*) + case $file:$destfile in + *.exe:*.exe) + # this is ok + ;; + *.exe:*) + destfile=$destfile.exe + ;; + *:*.exe) + func_stripname '' '.exe' "$destfile" + destfile=$func_stripname_result + ;; + esac + ;; + esac + func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?' + $opt_dry_run || if test -n "$outputname"; then + ${RM}r "$tmpdir" + fi + ;; + esac + done + + for file in $staticlibs; do + func_basename "$file" + name="$func_basename_result" + + # Set up the ranlib parameters. + oldlib="$destdir/$name" + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + + func_show_eval "$install_prog \$file \$oldlib" 'exit $?' + + if test -n "$stripme" && test -n "$old_striplib"; then + func_show_eval "$old_striplib $tool_oldlib" 'exit $?' + fi + + # Do each command in the postinstall commands. + func_execute_cmds "$old_postinstall_cmds" 'exit $?' + done + + test -n "$future_libdirs" && \ + func_warning "remember to run \`$progname --finish$future_libdirs'" + + if test -n "$current_libdirs"; then + # Maybe just do a dry run. + $opt_dry_run && current_libdirs=" -n$current_libdirs" + exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs' + else + exit $EXIT_SUCCESS + fi +} + +test "$opt_mode" = install && func_mode_install ${1+"$@"} + + +# func_generate_dlsyms outputname originator pic_p +# Extract symbols from dlprefiles and create ${outputname}S.o with +# a dlpreopen symbol table. +func_generate_dlsyms () +{ + $opt_debug + my_outputname="$1" + my_originator="$2" + my_pic_p="${3-no}" + my_prefix=`$ECHO "$my_originator" | sed 's%[^a-zA-Z0-9]%_%g'` + my_dlsyms= + + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + if test -n "$NM" && test -n "$global_symbol_pipe"; then + my_dlsyms="${my_outputname}S.c" + else + func_error "not configured to extract global symbols from dlpreopened files" + fi + fi + + if test -n "$my_dlsyms"; then + case $my_dlsyms in + "") ;; + *.c) + # Discover the nlist of each of the dlfiles. + nlist="$output_objdir/${my_outputname}.nm" + + func_show_eval "$RM $nlist ${nlist}S ${nlist}T" + + # Parse the name list into a source file. + func_verbose "creating $output_objdir/$my_dlsyms" + + $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\ +/* $my_dlsyms - symbol resolution table for \`$my_outputname' dlsym emulation. */ +/* Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION */ + +#ifdef __cplusplus +extern \"C\" { +#endif + +#if defined(__GNUC__) && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4)) +#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" +#endif + +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) +/* DATA imports from DLLs on WIN32 con't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined(__osf__) +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +/* External symbol declarations for the compiler. */\ +" + + if test "$dlself" = yes; then + func_verbose "generating symbol list for \`$output'" + + $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist" + + # Add our own program objects to the symbol list. + progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP` + for progfile in $progfiles; do + func_to_tool_file "$progfile" func_convert_file_msys_to_w32 + func_verbose "extracting global C symbols from \`$func_to_tool_file_result'" + $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'" + done + + if test -n "$exclude_expsyms"; then + $opt_dry_run || { + eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + if test -n "$export_symbols_regex"; then + $opt_dry_run || { + eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + export_symbols="$output_objdir/$outputname.exp" + $opt_dry_run || { + $RM $export_symbols + eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' + ;; + esac + } + else + $opt_dry_run || { + eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"' + eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' + ;; + esac + } + fi + fi + + for dlprefile in $dlprefiles; do + func_verbose "extracting global C symbols from \`$dlprefile'" + func_basename "$dlprefile" + name="$func_basename_result" + case $host in + *cygwin* | *mingw* | *cegcc* ) + # if an import library, we need to obtain dlname + if func_win32_import_lib_p "$dlprefile"; then + func_tr_sh "$dlprefile" + eval "curr_lafile=\$libfile_$func_tr_sh_result" + dlprefile_dlbasename="" + if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then + # Use subshell, to avoid clobbering current variable values + dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"` + if test -n "$dlprefile_dlname" ; then + func_basename "$dlprefile_dlname" + dlprefile_dlbasename="$func_basename_result" + else + # no lafile. user explicitly requested -dlpreopen <import library>. + $sharedlib_from_linklib_cmd "$dlprefile" + dlprefile_dlbasename=$sharedlib_from_linklib_result + fi + fi + $opt_dry_run || { + if test -n "$dlprefile_dlbasename" ; then + eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"' + else + func_warning "Could not compute DLL name from $name" + eval '$ECHO ": $name " >> "$nlist"' + fi + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe | + $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'" + } + else # not an import lib + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + fi + ;; + *) + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + ;; + esac + done + + $opt_dry_run || { + # Make sure we have at least an empty file. + test -f "$nlist" || : > "$nlist" + + if test -n "$exclude_expsyms"; then + $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T + $MV "$nlist"T "$nlist" + fi + + # Try sorting and uniquifying the output. + if $GREP -v "^: " < "$nlist" | + if sort -k 3 </dev/null >/dev/null 2>&1; then + sort -k 3 + else + sort +2 + fi | + uniq > "$nlist"S; then + : + else + $GREP -v "^: " < "$nlist" > "$nlist"S + fi + + if test -f "$nlist"S; then + eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"' + else + echo '/* NONE */' >> "$output_objdir/$my_dlsyms" + fi + + echo >> "$output_objdir/$my_dlsyms" "\ + +/* The mapping between symbol names and symbols. */ +typedef struct { + const char *name; + void *address; +} lt_dlsymlist; +extern LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[]; +LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[] = +{\ + { \"$my_originator\", (void *) 0 }," + + case $need_lib_prefix in + no) + eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + *) + eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + esac + echo >> "$output_objdir/$my_dlsyms" "\ + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt_${my_prefix}_LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif\ +" + } # !$opt_dry_run + + pic_flag_for_symtable= + case "$compile_command " in + *" -static "*) ;; + *) + case $host in + # compiling the symbol table file with pic_flag works around + # a FreeBSD bug that causes programs to crash when -lm is + # linked before any other PIC object. But we must not use + # pic_flag when linking with -static. The problem exists in + # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. + *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) + pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;; + *-*-hpux*) + pic_flag_for_symtable=" $pic_flag" ;; + *) + if test "X$my_pic_p" != Xno; then + pic_flag_for_symtable=" $pic_flag" + fi + ;; + esac + ;; + esac + symtab_cflags= + for arg in $LTCFLAGS; do + case $arg in + -pie | -fpie | -fPIE) ;; + *) func_append symtab_cflags " $arg" ;; + esac + done + + # Now compile the dynamic symbol file. + func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?' + + # Clean up the generated files. + func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T"' + + # Transform the symbol file into the correct name. + symfileobj="$output_objdir/${my_outputname}S.$objext" + case $host in + *cygwin* | *mingw* | *cegcc* ) + if test -f "$output_objdir/$my_outputname.def"; then + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + else + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + fi + ;; + *) + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + ;; + esac + ;; + *) + func_fatal_error "unknown suffix for \`$my_dlsyms'" + ;; + esac + else + # We keep going just in case the user didn't refer to + # lt_preloaded_symbols. The linker will fail if global_symbol_pipe + # really was required. + + # Nullify the symbol file. + compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"` + fi +} + +# func_win32_libid arg +# return the library type of file 'arg' +# +# Need a lot of goo to handle *both* DLLs and import libs +# Has to be a shell function in order to 'eat' the argument +# that is supplied when $file_magic_command is called. +# Despite the name, also deal with 64 bit binaries. +func_win32_libid () +{ + $opt_debug + win32_libid_type="unknown" + win32_fileres=`file -L $1 2>/dev/null` + case $win32_fileres in + *ar\ archive\ import\ library*) # definitely import + win32_libid_type="x86 archive import" + ;; + *ar\ archive*) # could be an import, or static + # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD. + if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | + $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then + func_to_tool_file "$1" func_convert_file_msys_to_w32 + win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" | + $SED -n -e ' + 1,100{ + / I /{ + s,.*,import, + p + q + } + }'` + case $win32_nmres in + import*) win32_libid_type="x86 archive import";; + *) win32_libid_type="x86 archive static";; + esac + fi + ;; + *DLL*) + win32_libid_type="x86 DLL" + ;; + *executable*) # but shell scripts are "executable" too... + case $win32_fileres in + *MS\ Windows\ PE\ Intel*) + win32_libid_type="x86 DLL" + ;; + esac + ;; + esac + $ECHO "$win32_libid_type" +} + +# func_cygming_dll_for_implib ARG +# +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib () +{ + $opt_debug + sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"` +} + +# func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs +# +# The is the core of a fallback implementation of a +# platform-specific function to extract the name of the +# DLL associated with the specified import library LIBNAME. +# +# SECTION_NAME is either .idata$6 or .idata$7, depending +# on the platform and compiler that created the implib. +# +# Echos the name of the DLL associated with the +# specified import library. +func_cygming_dll_for_implib_fallback_core () +{ + $opt_debug + match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"` + $OBJDUMP -s --section "$1" "$2" 2>/dev/null | + $SED '/^Contents of section '"$match_literal"':/{ + # Place marker at beginning of archive member dllname section + s/.*/====MARK====/ + p + d + } + # These lines can sometimes be longer than 43 characters, but + # are always uninteresting + /:[ ]*file format pe[i]\{,1\}-/d + /^In archive [^:]*:/d + # Ensure marker is printed + /^====MARK====/p + # Remove all lines with less than 43 characters + /^.\{43\}/!d + # From remaining lines, remove first 43 characters + s/^.\{43\}//' | + $SED -n ' + # Join marker and all lines until next marker into a single line + /^====MARK====/ b para + H + $ b para + b + :para + x + s/\n//g + # Remove the marker + s/^====MARK====// + # Remove trailing dots and whitespace + s/[\. \t]*$// + # Print + /./p' | + # we now have a list, one entry per line, of the stringified + # contents of the appropriate section of all members of the + # archive which possess that section. Heuristic: eliminate + # all those which have a first or second character that is + # a '.' (that is, objdump's representation of an unprintable + # character.) This should work for all archives with less than + # 0x302f exports -- but will fail for DLLs whose name actually + # begins with a literal '.' or a single character followed by + # a '.'. + # + # Of those that remain, print the first one. + $SED -e '/^\./d;/^.\./d;q' +} + +# func_cygming_gnu_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is a GNU/binutils-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_gnu_implib_p () +{ + $opt_debug + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'` + test -n "$func_cygming_gnu_implib_tmp" +} + +# func_cygming_ms_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is an MS-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_ms_implib_p () +{ + $opt_debug + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'` + test -n "$func_cygming_ms_implib_tmp" +} + +# func_cygming_dll_for_implib_fallback ARG +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# +# This fallback implementation is for use when $DLLTOOL +# does not support the --identify-strict option. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib_fallback () +{ + $opt_debug + if func_cygming_gnu_implib_p "$1" ; then + # binutils import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"` + elif func_cygming_ms_implib_p "$1" ; then + # ms-generated import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"` + else + # unknown + sharedlib_from_linklib_result="" + fi +} + + +# func_extract_an_archive dir oldlib +func_extract_an_archive () +{ + $opt_debug + f_ex_an_ar_dir="$1"; shift + f_ex_an_ar_oldlib="$1" + if test "$lock_old_archive_extraction" = yes; then + lockfile=$f_ex_an_ar_oldlib.lock + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + fi + func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \ + 'stat=$?; rm -f "$lockfile"; exit $stat' + if test "$lock_old_archive_extraction" = yes; then + $opt_dry_run || rm -f "$lockfile" + fi + if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then + : + else + func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" + fi +} + + +# func_extract_archives gentop oldlib ... +func_extract_archives () +{ + $opt_debug + my_gentop="$1"; shift + my_oldlibs=${1+"$@"} + my_oldobjs="" + my_xlib="" + my_xabs="" + my_xdir="" + + for my_xlib in $my_oldlibs; do + # Extract the objects. + case $my_xlib in + [\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;; + *) my_xabs=`pwd`"/$my_xlib" ;; + esac + func_basename "$my_xlib" + my_xlib="$func_basename_result" + my_xlib_u=$my_xlib + while :; do + case " $extracted_archives " in + *" $my_xlib_u "*) + func_arith $extracted_serial + 1 + extracted_serial=$func_arith_result + my_xlib_u=lt$extracted_serial-$my_xlib ;; + *) break ;; + esac + done + extracted_archives="$extracted_archives $my_xlib_u" + my_xdir="$my_gentop/$my_xlib_u" + + func_mkdir_p "$my_xdir" + + case $host in + *-darwin*) + func_verbose "Extracting $my_xabs" + # Do not bother doing anything if just a dry run + $opt_dry_run || { + darwin_orig_dir=`pwd` + cd $my_xdir || exit $? + darwin_archive=$my_xabs + darwin_curdir=`pwd` + darwin_base_archive=`basename "$darwin_archive"` + darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true` + if test -n "$darwin_arches"; then + darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'` + darwin_arch= + func_verbose "$darwin_base_archive has multiple architectures $darwin_arches" + for darwin_arch in $darwin_arches ; do + func_mkdir_p "unfat-$$/${darwin_base_archive}-${darwin_arch}" + $LIPO -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}" + cd "unfat-$$/${darwin_base_archive}-${darwin_arch}" + func_extract_an_archive "`pwd`" "${darwin_base_archive}" + cd "$darwin_curdir" + $RM "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" + done # $darwin_arches + ## Okay now we've a bunch of thin objects, gotta fatten them up :) + darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$basename" | sort -u` + darwin_file= + darwin_files= + for darwin_file in $darwin_filelist; do + darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP` + $LIPO -create -output "$darwin_file" $darwin_files + done # $darwin_filelist + $RM -rf unfat-$$ + cd "$darwin_orig_dir" + else + cd $darwin_orig_dir + func_extract_an_archive "$my_xdir" "$my_xabs" + fi # $darwin_arches + } # !$opt_dry_run + ;; + *) + func_extract_an_archive "$my_xdir" "$my_xabs" + ;; + esac + my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP` + done + + func_extract_archives_result="$my_oldobjs" +} + + +# func_emit_wrapper [arg=no] +# +# Emit a libtool wrapper script on stdout. +# Don't directly open a file because we may want to +# incorporate the script contents within a cygwin/mingw +# wrapper executable. Must ONLY be called from within +# func_mode_link because it depends on a number of variables +# set therein. +# +# ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR +# variable will take. If 'yes', then the emitted script +# will assume that the directory in which it is stored is +# the $objdir directory. This is a cygwin/mingw-specific +# behavior. +func_emit_wrapper () +{ + func_emit_wrapper_arg1=${1-no} + + $ECHO "\ +#! $SHELL + +# $output - temporary wrapper script for $objdir/$outputname +# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION +# +# The $output program cannot be directly executed until all the libtool +# libraries that it depends on are installed. +# +# This wrapper script should never be moved out of the build directory. +# If it is, it will not operate correctly. + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='$sed_quote_subst' + +# Be Bourne compatible +if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +relink_command=\"$relink_command\" + +# This environment variable determines our operation mode. +if test \"\$libtool_install_magic\" = \"$magic\"; then + # install mode needs the following variables: + generated_by_libtool_version='$macro_version' + notinst_deplibs='$notinst_deplibs' +else + # When we are sourced in execute mode, \$file and \$ECHO are already set. + if test \"\$libtool_execute_magic\" != \"$magic\"; then + file=\"\$0\"" + + qECHO=`$ECHO "$ECHO" | $SED "$sed_quote_subst"` + $ECHO "\ + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + ECHO=\"$qECHO\" + fi + +# Very basic option parsing. These options are (a) specific to +# the libtool wrapper, (b) are identical between the wrapper +# /script/ and the wrapper /executable/ which is used only on +# windows platforms, and (c) all begin with the string "--lt-" +# (application programs are unlikely to have options which match +# this pattern). +# +# There are only two supported options: --lt-debug and +# --lt-dump-script. There is, deliberately, no --lt-help. +# +# The first argument to this parsing function should be the +# script's $0 value, followed by "$@". +lt_option_debug= +func_parse_lt_options () +{ + lt_script_arg0=\$0 + shift + for lt_opt + do + case \"\$lt_opt\" in + --lt-debug) lt_option_debug=1 ;; + --lt-dump-script) + lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\` + test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=. + lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\` + cat \"\$lt_dump_D/\$lt_dump_F\" + exit 0 + ;; + --lt-*) + \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2 + exit 1 + ;; + esac + done + + # Print the debug banner immediately: + if test -n \"\$lt_option_debug\"; then + echo \"${outputname}:${output}:\${LINENO}: libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\" 1>&2 + fi +} + +# Used when --lt-debug. Prints its arguments to stdout +# (redirection is the responsibility of the caller) +func_lt_dump_args () +{ + lt_dump_args_N=1; + for lt_arg + do + \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[\$lt_dump_args_N]: \$lt_arg\" + lt_dump_args_N=\`expr \$lt_dump_args_N + 1\` + done +} + +# Core function for launching the target application +func_exec_program_core () +{ +" + case $host in + # Backslashes separate directories on plain windows + *-*-mingw | *-*-os2* | *-cegcc*) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir\\\\\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir\\\\\$program\" \${1+\"\$@\"} +" + ;; + + *) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir/\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir/\$program\" \${1+\"\$@\"} +" + ;; + esac + $ECHO "\ + \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2 + exit 1 +} + +# A function to encapsulate launching the target application +# Strips options in the --lt-* namespace from \$@ and +# launches target application with the remaining arguments. +func_exec_program () +{ + case \" \$* \" in + *\\ --lt-*) + for lt_wr_arg + do + case \$lt_wr_arg in + --lt-*) ;; + *) set x \"\$@\" \"\$lt_wr_arg\"; shift;; + esac + shift + done ;; + esac + func_exec_program_core \${1+\"\$@\"} +} + + # Parse options + func_parse_lt_options \"\$0\" \${1+\"\$@\"} + + # Find the directory that this script lives in. + thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\` + test \"x\$thisdir\" = \"x\$file\" && thisdir=. + + # Follow symbolic links until we get to the real thisdir. + file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\` + while test -n \"\$file\"; do + destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\` + + # If there was a directory component, then change thisdir. + if test \"x\$destdir\" != \"x\$file\"; then + case \"\$destdir\" in + [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; + *) thisdir=\"\$thisdir/\$destdir\" ;; + esac + fi + + file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\` + file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\` + done + + # Usually 'no', except on cygwin/mingw when embedded into + # the cwrapper. + WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1 + if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then + # special case for '.' + if test \"\$thisdir\" = \".\"; then + thisdir=\`pwd\` + fi + # remove .libs from thisdir + case \"\$thisdir\" in + *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;; + $objdir ) thisdir=. ;; + esac + fi + + # Try to get the absolute directory name. + absdir=\`cd \"\$thisdir\" && pwd\` + test -n \"\$absdir\" && thisdir=\"\$absdir\" +" + + if test "$fast_install" = yes; then + $ECHO "\ + program=lt-'$outputname'$exeext + progdir=\"\$thisdir/$objdir\" + + if test ! -f \"\$progdir/\$program\" || + { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\ + test \"X\$file\" != \"X\$progdir/\$program\"; }; then + + file=\"\$\$-\$program\" + + if test ! -d \"\$progdir\"; then + $MKDIR \"\$progdir\" + else + $RM \"\$progdir/\$file\" + fi" + + $ECHO "\ + + # relink executable if necessary + if test -n \"\$relink_command\"; then + if relink_command_output=\`eval \$relink_command 2>&1\`; then : + else + $ECHO \"\$relink_command_output\" >&2 + $RM \"\$progdir/\$file\" + exit 1 + fi + fi + + $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || + { $RM \"\$progdir/\$program\"; + $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; } + $RM \"\$progdir/\$file\" + fi" + else + $ECHO "\ + program='$outputname' + progdir=\"\$thisdir/$objdir\" +" + fi + + $ECHO "\ + + if test -f \"\$progdir/\$program\"; then" + + # fixup the dll searchpath if we need to. + # + # Fix the DLL searchpath if we need to. Do this before prepending + # to shlibpath, because on Windows, both are PATH and uninstalled + # libraries must come first. + if test -n "$dllsearchpath"; then + $ECHO "\ + # Add the dll search path components to the executable PATH + PATH=$dllsearchpath:\$PATH +" + fi + + # Export our shlibpath_var if we have one. + if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then + $ECHO "\ + # Add our own library path to $shlibpath_var + $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" + + # Some systems cannot cope with colon-terminated $shlibpath_var + # The second colon is a workaround for a bug in BeOS R4 sed + $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\` + + export $shlibpath_var +" + fi + + $ECHO "\ + if test \"\$libtool_execute_magic\" != \"$magic\"; then + # Run the actual program with our arguments. + func_exec_program \${1+\"\$@\"} + fi + else + # The program doesn't exist. + \$ECHO \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2 + \$ECHO \"This script is just a wrapper for \$program.\" 1>&2 + \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2 + exit 1 + fi +fi\ +" +} + + +# func_emit_cwrapperexe_src +# emit the source code for a wrapper executable on stdout +# Must ONLY be called from within func_mode_link because +# it depends on a number of variable set therein. +func_emit_cwrapperexe_src () +{ + cat <<EOF + +/* $cwrappersource - temporary wrapper executable for $objdir/$outputname + Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION + + The $output program cannot be directly executed until all the libtool + libraries that it depends on are installed. + + This wrapper executable should never be moved out of the build directory. + If it is, it will not operate correctly. +*/ +EOF + cat <<"EOF" +#ifdef _MSC_VER +# define _CRT_SECURE_NO_DEPRECATE 1 +#endif +#include <stdio.h> +#include <stdlib.h> +#ifdef _MSC_VER +# include <direct.h> +# include <process.h> +# include <io.h> +#else +# include <unistd.h> +# include <stdint.h> +# ifdef __CYGWIN__ +# include <io.h> +# endif +#endif +#include <malloc.h> +#include <stdarg.h> +#include <assert.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> + +/* declarations of non-ANSI functions */ +#if defined(__MINGW32__) +# ifdef __STRICT_ANSI__ +int _putenv (const char *); +# endif +#elif defined(__CYGWIN__) +# ifdef __STRICT_ANSI__ +char *realpath (const char *, char *); +int putenv (char *); +int setenv (const char *, const char *, int); +# endif +/* #elif defined (other platforms) ... */ +#endif + +/* portability defines, excluding path handling macros */ +#if defined(_MSC_VER) +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +# define S_IXUSR _S_IEXEC +# ifndef _INTPTR_T_DEFINED +# define _INTPTR_T_DEFINED +# define intptr_t int +# endif +#elif defined(__MINGW32__) +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +#elif defined(__CYGWIN__) +# define HAVE_SETENV +# define FOPEN_WB "wb" +/* #elif defined (other platforms) ... */ +#endif + +#if defined(PATH_MAX) +# define LT_PATHMAX PATH_MAX +#elif defined(MAXPATHLEN) +# define LT_PATHMAX MAXPATHLEN +#else +# define LT_PATHMAX 1024 +#endif + +#ifndef S_IXOTH +# define S_IXOTH 0 +#endif +#ifndef S_IXGRP +# define S_IXGRP 0 +#endif + +/* path handling portability macros */ +#ifndef DIR_SEPARATOR +# define DIR_SEPARATOR '/' +# define PATH_SEPARATOR ':' +#endif + +#if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \ + defined (__OS2__) +# define HAVE_DOS_BASED_FILE_SYSTEM +# define FOPEN_WB "wb" +# ifndef DIR_SEPARATOR_2 +# define DIR_SEPARATOR_2 '\\' +# endif +# ifndef PATH_SEPARATOR_2 +# define PATH_SEPARATOR_2 ';' +# endif +#endif + +#ifndef DIR_SEPARATOR_2 +# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) +#else /* DIR_SEPARATOR_2 */ +# define IS_DIR_SEPARATOR(ch) \ + (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) +#endif /* DIR_SEPARATOR_2 */ + +#ifndef PATH_SEPARATOR_2 +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR) +#else /* PATH_SEPARATOR_2 */ +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2) +#endif /* PATH_SEPARATOR_2 */ + +#ifndef FOPEN_WB +# define FOPEN_WB "w" +#endif +#ifndef _O_BINARY +# define _O_BINARY 0 +#endif + +#define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type))) +#define XFREE(stale) do { \ + if (stale) { free ((void *) stale); stale = 0; } \ +} while (0) + +#if defined(LT_DEBUGWRAPPER) +static int lt_debug = 1; +#else +static int lt_debug = 0; +#endif + +const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */ + +void *xmalloc (size_t num); +char *xstrdup (const char *string); +const char *base_name (const char *name); +char *find_executable (const char *wrapper); +char *chase_symlinks (const char *pathspec); +int make_executable (const char *path); +int check_executable (const char *path); +char *strendzap (char *str, const char *pat); +void lt_debugprintf (const char *file, int line, const char *fmt, ...); +void lt_fatal (const char *file, int line, const char *message, ...); +static const char *nonnull (const char *s); +static const char *nonempty (const char *s); +void lt_setenv (const char *name, const char *value); +char *lt_extend_str (const char *orig_value, const char *add, int to_end); +void lt_update_exe_path (const char *name, const char *value); +void lt_update_lib_path (const char *name, const char *value); +char **prepare_spawn (char **argv); +void lt_dump_script (FILE *f); +EOF + + cat <<EOF +volatile const char * MAGIC_EXE = "$magic_exe"; +const char * LIB_PATH_VARNAME = "$shlibpath_var"; +EOF + + if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then + func_to_host_path "$temp_rpath" + cat <<EOF +const char * LIB_PATH_VALUE = "$func_to_host_path_result"; +EOF + else + cat <<"EOF" +const char * LIB_PATH_VALUE = ""; +EOF + fi + + if test -n "$dllsearchpath"; then + func_to_host_path "$dllsearchpath:" + cat <<EOF +const char * EXE_PATH_VARNAME = "PATH"; +const char * EXE_PATH_VALUE = "$func_to_host_path_result"; +EOF + else + cat <<"EOF" +const char * EXE_PATH_VARNAME = ""; +const char * EXE_PATH_VALUE = ""; +EOF + fi + + if test "$fast_install" = yes; then + cat <<EOF +const char * TARGET_PROGRAM_NAME = "lt-$outputname"; /* hopefully, no .exe */ +EOF + else + cat <<EOF +const char * TARGET_PROGRAM_NAME = "$outputname"; /* hopefully, no .exe */ +EOF + fi + + + cat <<"EOF" + +#define LTWRAPPER_OPTION_PREFIX "--lt-" + +static const char *ltwrapper_option_prefix = LTWRAPPER_OPTION_PREFIX; +static const char *dumpscript_opt = LTWRAPPER_OPTION_PREFIX "dump-script"; +static const char *debug_opt = LTWRAPPER_OPTION_PREFIX "debug"; + +int +main (int argc, char *argv[]) +{ + char **newargz; + int newargc; + char *tmp_pathspec; + char *actual_cwrapper_path; + char *actual_cwrapper_name; + char *target_name; + char *lt_argv_zero; + intptr_t rval = 127; + + int i; + + program_name = (char *) xstrdup (base_name (argv[0])); + newargz = XMALLOC (char *, argc + 1); + + /* very simple arg parsing; don't want to rely on getopt + * also, copy all non cwrapper options to newargz, except + * argz[0], which is handled differently + */ + newargc=0; + for (i = 1; i < argc; i++) + { + if (strcmp (argv[i], dumpscript_opt) == 0) + { +EOF + case "$host" in + *mingw* | *cygwin* ) + # make stdout use "unix" line endings + echo " setmode(1,_O_BINARY);" + ;; + esac + + cat <<"EOF" + lt_dump_script (stdout); + return 0; + } + if (strcmp (argv[i], debug_opt) == 0) + { + lt_debug = 1; + continue; + } + if (strcmp (argv[i], ltwrapper_option_prefix) == 0) + { + /* however, if there is an option in the LTWRAPPER_OPTION_PREFIX + namespace, but it is not one of the ones we know about and + have already dealt with, above (inluding dump-script), then + report an error. Otherwise, targets might begin to believe + they are allowed to use options in the LTWRAPPER_OPTION_PREFIX + namespace. The first time any user complains about this, we'll + need to make LTWRAPPER_OPTION_PREFIX a configure-time option + or a configure.ac-settable value. + */ + lt_fatal (__FILE__, __LINE__, + "unrecognized %s option: '%s'", + ltwrapper_option_prefix, argv[i]); + } + /* otherwise ... */ + newargz[++newargc] = xstrdup (argv[i]); + } + newargz[++newargc] = NULL; + +EOF + cat <<EOF + /* The GNU banner must be the first non-error debug message */ + lt_debugprintf (__FILE__, __LINE__, "libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\n"); +EOF + cat <<"EOF" + lt_debugprintf (__FILE__, __LINE__, "(main) argv[0]: %s\n", argv[0]); + lt_debugprintf (__FILE__, __LINE__, "(main) program_name: %s\n", program_name); + + tmp_pathspec = find_executable (argv[0]); + if (tmp_pathspec == NULL) + lt_fatal (__FILE__, __LINE__, "couldn't find %s", argv[0]); + lt_debugprintf (__FILE__, __LINE__, + "(main) found exe (before symlink chase) at: %s\n", + tmp_pathspec); + + actual_cwrapper_path = chase_symlinks (tmp_pathspec); + lt_debugprintf (__FILE__, __LINE__, + "(main) found exe (after symlink chase) at: %s\n", + actual_cwrapper_path); + XFREE (tmp_pathspec); + + actual_cwrapper_name = xstrdup (base_name (actual_cwrapper_path)); + strendzap (actual_cwrapper_path, actual_cwrapper_name); + + /* wrapper name transforms */ + strendzap (actual_cwrapper_name, ".exe"); + tmp_pathspec = lt_extend_str (actual_cwrapper_name, ".exe", 1); + XFREE (actual_cwrapper_name); + actual_cwrapper_name = tmp_pathspec; + tmp_pathspec = 0; + + /* target_name transforms -- use actual target program name; might have lt- prefix */ + target_name = xstrdup (base_name (TARGET_PROGRAM_NAME)); + strendzap (target_name, ".exe"); + tmp_pathspec = lt_extend_str (target_name, ".exe", 1); + XFREE (target_name); + target_name = tmp_pathspec; + tmp_pathspec = 0; + + lt_debugprintf (__FILE__, __LINE__, + "(main) libtool target name: %s\n", + target_name); +EOF + + cat <<EOF + newargz[0] = + XMALLOC (char, (strlen (actual_cwrapper_path) + + strlen ("$objdir") + 1 + strlen (actual_cwrapper_name) + 1)); + strcpy (newargz[0], actual_cwrapper_path); + strcat (newargz[0], "$objdir"); + strcat (newargz[0], "/"); +EOF + + cat <<"EOF" + /* stop here, and copy so we don't have to do this twice */ + tmp_pathspec = xstrdup (newargz[0]); + + /* do NOT want the lt- prefix here, so use actual_cwrapper_name */ + strcat (newargz[0], actual_cwrapper_name); + + /* DO want the lt- prefix here if it exists, so use target_name */ + lt_argv_zero = lt_extend_str (tmp_pathspec, target_name, 1); + XFREE (tmp_pathspec); + tmp_pathspec = NULL; +EOF + + case $host_os in + mingw*) + cat <<"EOF" + { + char* p; + while ((p = strchr (newargz[0], '\\')) != NULL) + { + *p = '/'; + } + while ((p = strchr (lt_argv_zero, '\\')) != NULL) + { + *p = '/'; + } + } +EOF + ;; + esac + + cat <<"EOF" + XFREE (target_name); + XFREE (actual_cwrapper_path); + XFREE (actual_cwrapper_name); + + lt_setenv ("BIN_SH", "xpg4"); /* for Tru64 */ + lt_setenv ("DUALCASE", "1"); /* for MSK sh */ + /* Update the DLL searchpath. EXE_PATH_VALUE ($dllsearchpath) must + be prepended before (that is, appear after) LIB_PATH_VALUE ($temp_rpath) + because on Windows, both *_VARNAMEs are PATH but uninstalled + libraries must come first. */ + lt_update_exe_path (EXE_PATH_VARNAME, EXE_PATH_VALUE); + lt_update_lib_path (LIB_PATH_VARNAME, LIB_PATH_VALUE); + + lt_debugprintf (__FILE__, __LINE__, "(main) lt_argv_zero: %s\n", + nonnull (lt_argv_zero)); + for (i = 0; i < newargc; i++) + { + lt_debugprintf (__FILE__, __LINE__, "(main) newargz[%d]: %s\n", + i, nonnull (newargz[i])); + } + +EOF + + case $host_os in + mingw*) + cat <<"EOF" + /* execv doesn't actually work on mingw as expected on unix */ + newargz = prepare_spawn (newargz); + rval = _spawnv (_P_WAIT, lt_argv_zero, (const char * const *) newargz); + if (rval == -1) + { + /* failed to start process */ + lt_debugprintf (__FILE__, __LINE__, + "(main) failed to launch target \"%s\": %s\n", + lt_argv_zero, nonnull (strerror (errno))); + return 127; + } + return rval; +EOF + ;; + *) + cat <<"EOF" + execv (lt_argv_zero, newargz); + return rval; /* =127, but avoids unused variable warning */ +EOF + ;; + esac + + cat <<"EOF" +} + +void * +xmalloc (size_t num) +{ + void *p = (void *) malloc (num); + if (!p) + lt_fatal (__FILE__, __LINE__, "memory exhausted"); + + return p; +} + +char * +xstrdup (const char *string) +{ + return string ? strcpy ((char *) xmalloc (strlen (string) + 1), + string) : NULL; +} + +const char * +base_name (const char *name) +{ + const char *base; + +#if defined (HAVE_DOS_BASED_FILE_SYSTEM) + /* Skip over the disk name in MSDOS pathnames. */ + if (isalpha ((unsigned char) name[0]) && name[1] == ':') + name += 2; +#endif + + for (base = name; *name; name++) + if (IS_DIR_SEPARATOR (*name)) + base = name + 1; + return base; +} + +int +check_executable (const char *path) +{ + struct stat st; + + lt_debugprintf (__FILE__, __LINE__, "(check_executable): %s\n", + nonempty (path)); + if ((!path) || (!*path)) + return 0; + + if ((stat (path, &st) >= 0) + && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) + return 1; + else + return 0; +} + +int +make_executable (const char *path) +{ + int rval = 0; + struct stat st; + + lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n", + nonempty (path)); + if ((!path) || (!*path)) + return 0; + + if (stat (path, &st) >= 0) + { + rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR); + } + return rval; +} + +/* Searches for the full path of the wrapper. Returns + newly allocated full path name if found, NULL otherwise + Does not chase symlinks, even on platforms that support them. +*/ +char * +find_executable (const char *wrapper) +{ + int has_slash = 0; + const char *p; + const char *p_next; + /* static buffer for getcwd */ + char tmp[LT_PATHMAX + 1]; + int tmp_len; + char *concat_name; + + lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n", + nonempty (wrapper)); + + if ((wrapper == NULL) || (*wrapper == '\0')) + return NULL; + + /* Absolute path? */ +#if defined (HAVE_DOS_BASED_FILE_SYSTEM) + if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':') + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + else + { +#endif + if (IS_DIR_SEPARATOR (wrapper[0])) + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } +#if defined (HAVE_DOS_BASED_FILE_SYSTEM) + } +#endif + + for (p = wrapper; *p; p++) + if (*p == '/') + { + has_slash = 1; + break; + } + if (!has_slash) + { + /* no slashes; search PATH */ + const char *path = getenv ("PATH"); + if (path != NULL) + { + for (p = path; *p; p = p_next) + { + const char *q; + size_t p_len; + for (q = p; *q; q++) + if (IS_PATH_SEPARATOR (*q)) + break; + p_len = q - p; + p_next = (*q == '\0' ? q : q + 1); + if (p_len == 0) + { + /* empty path: current directory */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = + XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + } + else + { + concat_name = + XMALLOC (char, p_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, p, p_len); + concat_name[p_len] = '/'; + strcpy (concat_name + p_len + 1, wrapper); + } + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + } + /* not found in PATH; assume curdir */ + } + /* Relative path | not found in path: prepend cwd */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + return NULL; +} + +char * +chase_symlinks (const char *pathspec) +{ +#ifndef S_ISLNK + return xstrdup (pathspec); +#else + char buf[LT_PATHMAX]; + struct stat s; + char *tmp_pathspec = xstrdup (pathspec); + char *p; + int has_symlinks = 0; + while (strlen (tmp_pathspec) && !has_symlinks) + { + lt_debugprintf (__FILE__, __LINE__, + "checking path component for symlinks: %s\n", + tmp_pathspec); + if (lstat (tmp_pathspec, &s) == 0) + { + if (S_ISLNK (s.st_mode) != 0) + { + has_symlinks = 1; + break; + } + + /* search backwards for last DIR_SEPARATOR */ + p = tmp_pathspec + strlen (tmp_pathspec) - 1; + while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + p--; + if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + { + /* no more DIR_SEPARATORS left */ + break; + } + *p = '\0'; + } + else + { + lt_fatal (__FILE__, __LINE__, + "error accessing file \"%s\": %s", + tmp_pathspec, nonnull (strerror (errno))); + } + } + XFREE (tmp_pathspec); + + if (!has_symlinks) + { + return xstrdup (pathspec); + } + + tmp_pathspec = realpath (pathspec, buf); + if (tmp_pathspec == 0) + { + lt_fatal (__FILE__, __LINE__, + "could not follow symlinks for %s", pathspec); + } + return xstrdup (tmp_pathspec); +#endif +} + +char * +strendzap (char *str, const char *pat) +{ + size_t len, patlen; + + assert (str != NULL); + assert (pat != NULL); + + len = strlen (str); + patlen = strlen (pat); + + if (patlen <= len) + { + str += len - patlen; + if (strcmp (str, pat) == 0) + *str = '\0'; + } + return str; +} + +void +lt_debugprintf (const char *file, int line, const char *fmt, ...) +{ + va_list args; + if (lt_debug) + { + (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line); + va_start (args, fmt); + (void) vfprintf (stderr, fmt, args); + va_end (args); + } +} + +static void +lt_error_core (int exit_status, const char *file, + int line, const char *mode, + const char *message, va_list ap) +{ + fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode); + vfprintf (stderr, message, ap); + fprintf (stderr, ".\n"); + + if (exit_status >= 0) + exit (exit_status); +} + +void +lt_fatal (const char *file, int line, const char *message, ...) +{ + va_list ap; + va_start (ap, message); + lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap); + va_end (ap); +} + +static const char * +nonnull (const char *s) +{ + return s ? s : "(null)"; +} + +static const char * +nonempty (const char *s) +{ + return (s && !*s) ? "(empty)" : nonnull (s); +} + +void +lt_setenv (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_setenv) setting '%s' to '%s'\n", + nonnull (name), nonnull (value)); + { +#ifdef HAVE_SETENV + /* always make a copy, for consistency with !HAVE_SETENV */ + char *str = xstrdup (value); + setenv (name, str, 1); +#else + int len = strlen (name) + 1 + strlen (value) + 1; + char *str = XMALLOC (char, len); + sprintf (str, "%s=%s", name, value); + if (putenv (str) != EXIT_SUCCESS) + { + XFREE (str); + } +#endif + } +} + +char * +lt_extend_str (const char *orig_value, const char *add, int to_end) +{ + char *new_value; + if (orig_value && *orig_value) + { + int orig_value_len = strlen (orig_value); + int add_len = strlen (add); + new_value = XMALLOC (char, add_len + orig_value_len + 1); + if (to_end) + { + strcpy (new_value, orig_value); + strcpy (new_value + orig_value_len, add); + } + else + { + strcpy (new_value, add); + strcpy (new_value + add_len, orig_value); + } + } + else + { + new_value = xstrdup (add); + } + return new_value; +} + +void +lt_update_exe_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_exe_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + /* some systems can't cope with a ':'-terminated path #' */ + int len = strlen (new_value); + while (((len = strlen (new_value)) > 0) && IS_PATH_SEPARATOR (new_value[len-1])) + { + new_value[len-1] = '\0'; + } + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +void +lt_update_lib_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_lib_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +EOF + case $host_os in + mingw*) + cat <<"EOF" + +/* Prepares an argument vector before calling spawn(). + Note that spawn() does not by itself call the command interpreter + (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") : + ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + GetVersionEx(&v); + v.dwPlatformId == VER_PLATFORM_WIN32_NT; + }) ? "cmd.exe" : "command.com"). + Instead it simply concatenates the arguments, separated by ' ', and calls + CreateProcess(). We must quote the arguments since Win32 CreateProcess() + interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a + special way: + - Space and tab are interpreted as delimiters. They are not treated as + delimiters if they are surrounded by double quotes: "...". + - Unescaped double quotes are removed from the input. Their only effect is + that within double quotes, space and tab are treated like normal + characters. + - Backslashes not followed by double quotes are not special. + - But 2*n+1 backslashes followed by a double quote become + n backslashes followed by a double quote (n >= 0): + \" -> " + \\\" -> \" + \\\\\" -> \\" + */ +#define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +#define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +char ** +prepare_spawn (char **argv) +{ + size_t argc; + char **new_argv; + size_t i; + + /* Count number of arguments. */ + for (argc = 0; argv[argc] != NULL; argc++) + ; + + /* Allocate new argument vector. */ + new_argv = XMALLOC (char *, argc + 1); + + /* Put quoted arguments into the new argument vector. */ + for (i = 0; i < argc; i++) + { + const char *string = argv[i]; + + if (string[0] == '\0') + new_argv[i] = xstrdup ("\"\""); + else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL) + { + int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL); + size_t length; + unsigned int backslashes; + const char *s; + char *quoted_string; + char *p; + + length = 0; + backslashes = 0; + if (quote_around) + length++; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + length += backslashes + 1; + length++; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + length += backslashes + 1; + + quoted_string = XMALLOC (char, length + 1); + + p = quoted_string; + backslashes = 0; + if (quote_around) + *p++ = '"'; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + { + unsigned int j; + for (j = backslashes + 1; j > 0; j--) + *p++ = '\\'; + } + *p++ = c; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + { + unsigned int j; + for (j = backslashes; j > 0; j--) + *p++ = '\\'; + *p++ = '"'; + } + *p = '\0'; + + new_argv[i] = quoted_string; + } + else + new_argv[i] = (char *) string; + } + new_argv[argc] = NULL; + + return new_argv; +} +EOF + ;; + esac + + cat <<"EOF" +void lt_dump_script (FILE* f) +{ +EOF + func_emit_wrapper yes | + $SED -n -e ' +s/^\(.\{79\}\)\(..*\)/\1\ +\2/ +h +s/\([\\"]\)/\\\1/g +s/$/\\n/ +s/\([^\n]*\).*/ fputs ("\1", f);/p +g +D' + cat <<"EOF" +} +EOF +} +# end: func_emit_cwrapperexe_src + +# func_win32_import_lib_p ARG +# True if ARG is an import lib, as indicated by $file_magic_cmd +func_win32_import_lib_p () +{ + $opt_debug + case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in + *import*) : ;; + *) false ;; + esac +} + +# func_mode_link arg... +func_mode_link () +{ + $opt_debug + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + # It is impossible to link a dll without this setting, and + # we shouldn't force the makefile maintainer to figure out + # which system we are compiling for in order to pass an extra + # flag for every libtool invocation. + # allow_undefined=no + + # FIXME: Unfortunately, there are problems with the above when trying + # to make a dll which has undefined symbols, in which case not + # even a static library is built. For now, we need to specify + # -no-undefined on the libtool link line when we can be certain + # that all symbols are satisfied, otherwise we get a static library. + allow_undefined=yes + ;; + *) + allow_undefined=yes + ;; + esac + libtool_args=$nonopt + base_compile="$nonopt $@" + compile_command=$nonopt + finalize_command=$nonopt + + compile_rpath= + finalize_rpath= + compile_shlibpath= + finalize_shlibpath= + convenience= + old_convenience= + deplibs= + old_deplibs= + compiler_flags= + linker_flags= + dllsearchpath= + lib_search_path=`pwd` + inst_prefix_dir= + new_inherited_linker_flags= + + avoid_version=no + bindir= + dlfiles= + dlprefiles= + dlself=no + export_dynamic=no + export_symbols= + export_symbols_regex= + generated= + libobjs= + ltlibs= + module=no + no_install=no + objs= + non_pic_objects= + precious_files_regex= + prefer_static_libs=no + preload=no + prev= + prevarg= + release= + rpath= + xrpath= + perm_rpath= + temp_rpath= + thread_safe=no + vinfo= + vinfo_number=no + weak_libs= + single_module="${wl}-single_module" + func_infer_tag $base_compile + + # We need to know -static, to get the right output filenames. + for arg + do + case $arg in + -shared) + test "$build_libtool_libs" != yes && \ + func_fatal_configuration "can not build a shared library" + build_old_libs=no + break + ;; + -all-static | -static | -static-libtool-libs) + case $arg in + -all-static) + if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then + func_warning "complete static linking is impossible in this configuration" + fi + if test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + -static) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=built + ;; + -static-libtool-libs) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + esac + build_libtool_libs=no + build_old_libs=yes + break + ;; + esac + done + + # See if our shared archives depend on static archives. + test -n "$old_archive_from_new_cmds" && build_old_libs=yes + + # Go through the arguments, transforming them on the way. + while test "$#" -gt 0; do + arg="$1" + shift + func_quote_for_eval "$arg" + qarg=$func_quote_for_eval_unquoted_result + func_append libtool_args " $func_quote_for_eval_result" + + # If the previous option needs an argument, assign it. + if test -n "$prev"; then + case $prev in + output) + func_append compile_command " @OUTPUT@" + func_append finalize_command " @OUTPUT@" + ;; + esac + + case $prev in + bindir) + bindir="$arg" + prev= + continue + ;; + dlfiles|dlprefiles) + if test "$preload" = no; then + # Add the symbol object into the linking commands. + func_append compile_command " @SYMFILE@" + func_append finalize_command " @SYMFILE@" + preload=yes + fi + case $arg in + *.la | *.lo) ;; # We handle these cases below. + force) + if test "$dlself" = no; then + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + self) + if test "$prev" = dlprefiles; then + dlself=yes + elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then + dlself=yes + else + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + *) + if test "$prev" = dlfiles; then + func_append dlfiles " $arg" + else + func_append dlprefiles " $arg" + fi + prev= + continue + ;; + esac + ;; + expsyms) + export_symbols="$arg" + test -f "$arg" \ + || func_fatal_error "symbol file \`$arg' does not exist" + prev= + continue + ;; + expsyms_regex) + export_symbols_regex="$arg" + prev= + continue + ;; + framework) + case $host in + *-*-darwin*) + case "$deplibs " in + *" $qarg.ltframework "*) ;; + *) func_append deplibs " $qarg.ltframework" # this is fixed later + ;; + esac + ;; + esac + prev= + continue + ;; + inst_prefix) + inst_prefix_dir="$arg" + prev= + continue + ;; + objectlist) + if test -f "$arg"; then + save_arg=$arg + moreargs= + for fil in `cat "$save_arg"` + do +# func_append moreargs " $fil" + arg=$fil + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test "$pic_object" = none && + test "$non_pic_object" = none; then + func_fatal_error "cannot find name of object for \`$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + if test "$pic_object" != none; then + # Prepend the subdirectory the object is found in. + pic_object="$xdir$pic_object" + + if test "$prev" = dlfiles; then + if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test "$prev" = dlprefiles; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg="$pic_object" + fi + + # Non-PIC object. + if test "$non_pic_object" != none; then + # Prepend the subdirectory the object is found in. + non_pic_object="$xdir$non_pic_object" + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test "$pic_object" = none ; then + arg="$non_pic_object" + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object="$pic_object" + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "\`$arg' is not a valid libtool object" + fi + fi + done + else + func_fatal_error "link input file \`$arg' does not exist" + fi + arg=$save_arg + prev= + continue + ;; + precious_regex) + precious_files_regex="$arg" + prev= + continue + ;; + release) + release="-$arg" + prev= + continue + ;; + rpath | xrpath) + # We need an absolute path. + case $arg in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + if test "$prev" = rpath; then + case "$rpath " in + *" $arg "*) ;; + *) func_append rpath " $arg" ;; + esac + else + case "$xrpath " in + *" $arg "*) ;; + *) func_append xrpath " $arg" ;; + esac + fi + prev= + continue + ;; + shrext) + shrext_cmds="$arg" + prev= + continue + ;; + weak) + func_append weak_libs " $arg" + prev= + continue + ;; + xcclinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xcompiler) + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xlinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $wl$qarg" + prev= + func_append compile_command " $wl$qarg" + func_append finalize_command " $wl$qarg" + continue + ;; + *) + eval "$prev=\"\$arg\"" + prev= + continue + ;; + esac + fi # test -n "$prev" + + prevarg="$arg" + + case $arg in + -all-static) + if test -n "$link_static_flag"; then + # See comment for -static flag below, for more details. + func_append compile_command " $link_static_flag" + func_append finalize_command " $link_static_flag" + fi + continue + ;; + + -allow-undefined) + # FIXME: remove this flag sometime in the future. + func_fatal_error "\`-allow-undefined' must not be used because it is the default" + ;; + + -avoid-version) + avoid_version=yes + continue + ;; + + -bindir) + prev=bindir + continue + ;; + + -dlopen) + prev=dlfiles + continue + ;; + + -dlpreopen) + prev=dlprefiles + continue + ;; + + -export-dynamic) + export_dynamic=yes + continue + ;; + + -export-symbols | -export-symbols-regex) + if test -n "$export_symbols" || test -n "$export_symbols_regex"; then + func_fatal_error "more than one -exported-symbols argument is not allowed" + fi + if test "X$arg" = "X-export-symbols"; then + prev=expsyms + else + prev=expsyms_regex + fi + continue + ;; + + -framework) + prev=framework + continue + ;; + + -inst-prefix-dir) + prev=inst_prefix + continue + ;; + + # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* + # so, if we see these flags be careful not to treat them like -L + -L[A-Z][A-Z]*:*) + case $with_gcc/$host in + no/*-*-irix* | /*-*-irix*) + func_append compile_command " $arg" + func_append finalize_command " $arg" + ;; + esac + continue + ;; + + -L*) + func_stripname "-L" '' "$arg" + if test -z "$func_stripname_result"; then + if test "$#" -gt 0; then + func_fatal_error "require no space between \`-L' and \`$1'" + else + func_fatal_error "need path for \`-L' option" + fi + fi + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + absdir=`cd "$dir" && pwd` + test -z "$absdir" && \ + func_fatal_error "cannot determine absolute directory name of \`$dir'" + dir="$absdir" + ;; + esac + case "$deplibs " in + *" -L$dir "* | *" $arg "*) + # Will only happen for absolute or sysroot arguments + ;; + *) + # Preserve sysroot, but never include relative directories + case $dir in + [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;; + *) func_append deplibs " -L$dir" ;; + esac + func_append lib_search_path " $dir" + ;; + esac + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$dir:"*) ;; + ::) dllsearchpath=$dir;; + *) func_append dllsearchpath ":$dir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + continue + ;; + + -l*) + if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) + # These systems don't actually have a C or math library (as such) + continue + ;; + *-*-os2*) + # These systems don't actually have a C library (as such) + test "X$arg" = "X-lc" && continue + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) + # Do not include libc due to us having libc/libc_r. + test "X$arg" = "X-lc" && continue + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C and math libraries are in the System framework + func_append deplibs " System.ltframework" + continue + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + test "X$arg" = "X-lc" && continue + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + test "X$arg" = "X-lc" && continue + ;; + esac + elif test "X$arg" = "X-lc_r"; then + case $host in + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) + # Do not include libc_r directly, use -pthread flag. + continue + ;; + esac + fi + func_append deplibs " $arg" + continue + ;; + + -module) + module=yes + continue + ;; + + # Tru64 UNIX uses -model [arg] to determine the layout of C++ + # classes, name mangling, and exception handling. + # Darwin uses the -arch flag to determine output architecture. + -model|-arch|-isysroot|--sysroot) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + prev=xcompiler + continue + ;; + + -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + case "$new_inherited_linker_flags " in + *" $arg "*) ;; + * ) func_append new_inherited_linker_flags " $arg" ;; + esac + continue + ;; + + -multi_module) + single_module="${wl}-multi_module" + continue + ;; + + -no-fast-install) + fast_install=no + continue + ;; + + -no-install) + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) + # The PATH hackery in wrapper scripts is required on Windows + # and Darwin in order for the loader to find any dlls it needs. + func_warning "\`-no-install' is ignored for $host" + func_warning "assuming \`-no-fast-install' instead" + fast_install=no + ;; + *) no_install=yes ;; + esac + continue + ;; + + -no-undefined) + allow_undefined=no + continue + ;; + + -objectlist) + prev=objectlist + continue + ;; + + -o) prev=output ;; + + -precious-files-regex) + prev=precious_regex + continue + ;; + + -release) + prev=release + continue + ;; + + -rpath) + prev=rpath + continue + ;; + + -R) + prev=xrpath + continue + ;; + + -R*) + func_stripname '-R' '' "$arg" + dir=$func_stripname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + =*) + func_stripname '=' '' "$dir" + dir=$lt_sysroot$func_stripname_result + ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + continue + ;; + + -shared) + # The effects of -shared are defined in a previous loop. + continue + ;; + + -shrext) + prev=shrext + continue + ;; + + -static | -static-libtool-libs) + # The effects of -static are defined in a previous loop. + # We used to do the same as -all-static on platforms that + # didn't have a PIC flag, but the assumption that the effects + # would be equivalent was wrong. It would break on at least + # Digital Unix and AIX. + continue + ;; + + -thread-safe) + thread_safe=yes + continue + ;; + + -version-info) + prev=vinfo + continue + ;; + + -version-number) + prev=vinfo + vinfo_number=yes + continue + ;; + + -weak) + prev=weak + continue + ;; + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs="$IFS"; IFS=',' + for flag in $args; do + IFS="$save_ifs" + func_quote_for_eval "$flag" + func_append arg " $func_quote_for_eval_result" + func_append compiler_flags " $func_quote_for_eval_result" + done + IFS="$save_ifs" + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Wl,*) + func_stripname '-Wl,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs="$IFS"; IFS=',' + for flag in $args; do + IFS="$save_ifs" + func_quote_for_eval "$flag" + func_append arg " $wl$func_quote_for_eval_result" + func_append compiler_flags " $wl$func_quote_for_eval_result" + func_append linker_flags " $func_quote_for_eval_result" + done + IFS="$save_ifs" + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Xcompiler) + prev=xcompiler + continue + ;; + + -Xlinker) + prev=xlinker + continue + ;; + + -XCClinker) + prev=xcclinker + continue + ;; + + # -msg_* for osf cc + -msg_*) + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + ;; + + # Flags to be passed through unchanged, with rationale: + # -64, -mips[0-9] enable 64-bit mode for the SGI compiler + # -r[0-9][0-9]* specify processor for the SGI compiler + # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler + # +DA*, +DD* enable 64-bit mode for the HP compiler + # -q* compiler args for the IBM compiler + # -m*, -t[45]*, -txscale* architecture-specific flags for GCC + # -F/path path to uninstalled frameworks, gcc on darwin + # -p, -pg, --coverage, -fprofile-* profiling flags for GCC + # @file GCC response files + # -tp=* Portland pgcc target processor selection + # --sysroot=* for sysroot support + # -O*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization + -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ + -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ + -O*|-flto*|-fwhopr*|-fuse-linker-plugin) + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + func_append compile_command " $arg" + func_append finalize_command " $arg" + func_append compiler_flags " $arg" + continue + ;; + + # Some other compiler flag. + -* | +*) + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + ;; + + *.$objext) + # A standard object. + func_append objs " $arg" + ;; + + *.lo) + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test "$pic_object" = none && + test "$non_pic_object" = none; then + func_fatal_error "cannot find name of object for \`$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + if test "$pic_object" != none; then + # Prepend the subdirectory the object is found in. + pic_object="$xdir$pic_object" + + if test "$prev" = dlfiles; then + if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test "$prev" = dlprefiles; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg="$pic_object" + fi + + # Non-PIC object. + if test "$non_pic_object" != none; then + # Prepend the subdirectory the object is found in. + non_pic_object="$xdir$non_pic_object" + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test "$pic_object" = none ; then + arg="$non_pic_object" + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object="$pic_object" + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "\`$arg' is not a valid libtool object" + fi + fi + ;; + + *.$libext) + # An archive. + func_append deplibs " $arg" + func_append old_deplibs " $arg" + continue + ;; + + *.la) + # A libtool-controlled library. + + func_resolve_sysroot "$arg" + if test "$prev" = dlfiles; then + # This library was specified with -dlopen. + func_append dlfiles " $func_resolve_sysroot_result" + prev= + elif test "$prev" = dlprefiles; then + # The library was specified with -dlpreopen. + func_append dlprefiles " $func_resolve_sysroot_result" + prev= + else + func_append deplibs " $func_resolve_sysroot_result" + fi + continue + ;; + + # Some other compiler argument. + *) + # Unknown arguments in both finalize_command and compile_command need + # to be aesthetically quoted because they are evaled later. + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + ;; + esac # arg + + # Now actually substitute the argument into the commands. + if test -n "$arg"; then + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + done # argument parsing loop + + test -n "$prev" && \ + func_fatal_help "the \`$prevarg' option requires an argument" + + if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then + eval arg=\"$export_dynamic_flag_spec\" + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + + oldlibs= + # calculate the name of the file, without its directory + func_basename "$output" + outputname="$func_basename_result" + libobjs_save="$libobjs" + + if test -n "$shlibpath_var"; then + # get the directories listed in $shlibpath_var + eval shlib_search_path=\`\$ECHO \"\${$shlibpath_var}\" \| \$SED \'s/:/ /g\'\` + else + shlib_search_path= + fi + eval sys_lib_search_path=\"$sys_lib_search_path_spec\" + eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" + + func_dirname "$output" "/" "" + output_objdir="$func_dirname_result$objdir" + func_to_tool_file "$output_objdir/" + tool_output_objdir=$func_to_tool_file_result + # Create the object directory. + func_mkdir_p "$output_objdir" + + # Determine the type of output + case $output in + "") + func_fatal_help "you must specify an output file" + ;; + *.$libext) linkmode=oldlib ;; + *.lo | *.$objext) linkmode=obj ;; + *.la) linkmode=lib ;; + *) linkmode=prog ;; # Anything else should be a program. + esac + + specialdeplibs= + + libs= + # Find all interdependent deplibs by searching for libraries + # that are linked more than once (e.g. -la -lb -la) + for deplib in $deplibs; do + if $opt_preserve_dup_deps ; then + case "$libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append libs " $deplib" + done + + if test "$linkmode" = lib; then + libs="$predeps $libs $compiler_lib_search_path $postdeps" + + # Compute libraries that are listed more than once in $predeps + # $postdeps and mark them as special (i.e., whose duplicates are + # not to be eliminated). + pre_post_deps= + if $opt_duplicate_compiler_generated_deps; then + for pre_post_dep in $predeps $postdeps; do + case "$pre_post_deps " in + *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;; + esac + func_append pre_post_deps " $pre_post_dep" + done + fi + pre_post_deps= + fi + + deplibs= + newdependency_libs= + newlib_search_path= + need_relink=no # whether we're linking any uninstalled libtool libraries + notinst_deplibs= # not-installed libtool libraries + notinst_path= # paths that contain not-installed libtool libraries + + case $linkmode in + lib) + passes="conv dlpreopen link" + for file in $dlfiles $dlprefiles; do + case $file in + *.la) ;; + *) + func_fatal_help "libraries can \`-dlopen' only libtool libraries: $file" + ;; + esac + done + ;; + prog) + compile_deplibs= + finalize_deplibs= + alldeplibs=no + newdlfiles= + newdlprefiles= + passes="conv scan dlopen dlpreopen link" + ;; + *) passes="conv" + ;; + esac + + for pass in $passes; do + # The preopen pass in lib mode reverses $deplibs; put it back here + # so that -L comes before libs that need it for instance... + if test "$linkmode,$pass" = "lib,link"; then + ## FIXME: Find the place where the list is rebuilt in the wrong + ## order, and fix it there properly + tmp_deplibs= + for deplib in $deplibs; do + tmp_deplibs="$deplib $tmp_deplibs" + done + deplibs="$tmp_deplibs" + fi + + if test "$linkmode,$pass" = "lib,link" || + test "$linkmode,$pass" = "prog,scan"; then + libs="$deplibs" + deplibs= + fi + if test "$linkmode" = prog; then + case $pass in + dlopen) libs="$dlfiles" ;; + dlpreopen) libs="$dlprefiles" ;; + link) + libs="$deplibs %DEPLIBS%" + test "X$link_all_deplibs" != Xno && libs="$libs $dependency_libs" + ;; + esac + fi + if test "$linkmode,$pass" = "lib,dlpreopen"; then + # Collect and forward deplibs of preopened libtool libs + for lib in $dlprefiles; do + # Ignore non-libtool-libs + dependency_libs= + func_resolve_sysroot "$lib" + case $lib in + *.la) func_source "$func_resolve_sysroot_result" ;; + esac + + # Collect preopened libtool deplibs, except any this library + # has declared as weak libs + for deplib in $dependency_libs; do + func_basename "$deplib" + deplib_base=$func_basename_result + case " $weak_libs " in + *" $deplib_base "*) ;; + *) func_append deplibs " $deplib" ;; + esac + done + done + libs="$dlprefiles" + fi + if test "$pass" = dlopen; then + # Collect dlpreopened libraries + save_deplibs="$deplibs" + deplibs= + fi + + for deplib in $libs; do + lib= + found=no + case $deplib in + -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append compiler_flags " $deplib" + if test "$linkmode" = lib ; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -l*) + if test "$linkmode" != lib && test "$linkmode" != prog; then + func_warning "\`-l' is ignored for archives/objects" + continue + fi + func_stripname '-l' '' "$deplib" + name=$func_stripname_result + if test "$linkmode" = lib; then + searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path" + else + searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path" + fi + for searchdir in $searchdirs; do + for search_ext in .la $std_shrext .so .a; do + # Search the libtool library + lib="$searchdir/lib${name}${search_ext}" + if test -f "$lib"; then + if test "$search_ext" = ".la"; then + found=yes + else + found=no + fi + break 2 + fi + done + done + if test "$found" != yes; then + # deplib doesn't seem to be a libtool library + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" + fi + continue + else # deplib is a libtool library + # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib, + # We need to do some special things here, and not later. + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + case " $predeps $postdeps " in + *" $deplib "*) + if func_lalib_p "$lib"; then + library_names= + old_library= + func_source "$lib" + for l in $old_library $library_names; do + ll="$l" + done + if test "X$ll" = "X$old_library" ; then # only static version available + found=no + func_dirname "$lib" "" "." + ladir="$func_dirname_result" + lib=$ladir/$old_library + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + fi + ;; + *) ;; + esac + fi + fi + ;; # -l + *.ltframework) + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + if test "$linkmode" = lib ; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -L*) + case $linkmode in + lib) + deplibs="$deplib $deplibs" + test "$pass" = conv && continue + newdependency_libs="$deplib $newdependency_libs" + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + prog) + if test "$pass" = conv; then + deplibs="$deplib $deplibs" + continue + fi + if test "$pass" = scan; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + *) + func_warning "\`-L' is ignored for archives/objects" + ;; + esac # linkmode + continue + ;; # -L + -R*) + if test "$pass" = link; then + func_stripname '-R' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # Make sure the xrpath contains only unique directories. + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + fi + deplibs="$deplib $deplibs" + continue + ;; + *.la) + func_resolve_sysroot "$deplib" + lib=$func_resolve_sysroot_result + ;; + *.$libext) + if test "$pass" = conv; then + deplibs="$deplib $deplibs" + continue + fi + case $linkmode in + lib) + # Linking convenience modules into shared libraries is allowed, + # but linking other static libraries is non-portable. + case " $dlpreconveniencelibs " in + *" $deplib "*) ;; + *) + valid_a_lib=no + case $deplibs_check_method in + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \ + | $EGREP "$match_pattern_regex" > /dev/null; then + valid_a_lib=yes + fi + ;; + pass_all) + valid_a_lib=yes + ;; + esac + if test "$valid_a_lib" != yes; then + echo + $ECHO "*** Warning: Trying to link with static lib archive $deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because the file extensions .$libext of this argument makes me believe" + echo "*** that it is just a static archive that I should not use here." + else + echo + $ECHO "*** Warning: Linking the shared library $output against the" + $ECHO "*** static library $deplib is not portable!" + deplibs="$deplib $deplibs" + fi + ;; + esac + continue + ;; + prog) + if test "$pass" != link; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + continue + ;; + esac # linkmode + ;; # *.$libext + *.lo | *.$objext) + if test "$pass" = conv; then + deplibs="$deplib $deplibs" + elif test "$linkmode" = prog; then + if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then + # If there is no dlopen support or we're linking statically, + # we need to preload. + func_append newdlprefiles " $deplib" + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append newdlfiles " $deplib" + fi + fi + continue + ;; + %DEPLIBS%) + alldeplibs=yes + continue + ;; + esac # case $deplib + + if test "$found" = yes || test -f "$lib"; then : + else + func_fatal_error "cannot find the library \`$lib' or unhandled argument \`$deplib'" + fi + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$lib" \ + || func_fatal_error "\`$lib' is not a valid libtool archive" + + func_dirname "$lib" "" "." + ladir="$func_dirname_result" + + dlname= + dlopen= + dlpreopen= + libdir= + library_names= + old_library= + inherited_linker_flags= + # If the library was installed with an old release of libtool, + # it will not redefine variables installed, or shouldnotlink + installed=yes + shouldnotlink=no + avoidtemprpath= + + + # Read the .la file + func_source "$lib" + + # Convert "-framework foo" to "foo.ltframework" + if test -n "$inherited_linker_flags"; then + tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` + for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do + case " $new_inherited_linker_flags " in + *" $tmp_inherited_linker_flag "*) ;; + *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";; + esac + done + fi + dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + if test "$linkmode,$pass" = "lib,link" || + test "$linkmode,$pass" = "prog,scan" || + { test "$linkmode" != prog && test "$linkmode" != lib; }; then + test -n "$dlopen" && func_append dlfiles " $dlopen" + test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen" + fi + + if test "$pass" = conv; then + # Only check for convenience libraries + deplibs="$lib $deplibs" + if test -z "$libdir"; then + if test -z "$old_library"; then + func_fatal_error "cannot find name of link library for \`$lib'" + fi + # It is a libtool convenience library, so add in its objects. + func_append convenience " $ladir/$objdir/$old_library" + func_append old_convenience " $ladir/$objdir/$old_library" + tmp_libs= + for deplib in $dependency_libs; do + deplibs="$deplib $deplibs" + if $opt_preserve_dup_deps ; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done + elif test "$linkmode" != prog && test "$linkmode" != lib; then + func_fatal_error "\`$lib' is not a convenience library" + fi + continue + fi # $pass = conv + + + # Get the name of the library we link against. + linklib= + if test -n "$old_library" && + { test "$prefer_static_libs" = yes || + test "$prefer_static_libs,$installed" = "built,no"; }; then + linklib=$old_library + else + for l in $old_library $library_names; do + linklib="$l" + done + fi + if test -z "$linklib"; then + func_fatal_error "cannot find name of link library for \`$lib'" + fi + + # This library was specified with -dlopen. + if test "$pass" = dlopen; then + if test -z "$libdir"; then + func_fatal_error "cannot -dlopen a convenience library: \`$lib'" + fi + if test -z "$dlname" || + test "$dlopen_support" != yes || + test "$build_libtool_libs" = no; then + # If there is no dlname, no dlopen support or we're linking + # statically, we need to preload. We also need to preload any + # dependent libraries so libltdl's deplib preloader doesn't + # bomb out in the load deplibs phase. + func_append dlprefiles " $lib $dependency_libs" + else + func_append newdlfiles " $lib" + fi + continue + fi # $pass = dlopen + + # We need an absolute path. + case $ladir in + [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;; + *) + abs_ladir=`cd "$ladir" && pwd` + if test -z "$abs_ladir"; then + func_warning "cannot determine absolute directory name of \`$ladir'" + func_warning "passing it literally to the linker, although it might fail" + abs_ladir="$ladir" + fi + ;; + esac + func_basename "$lib" + laname="$func_basename_result" + + # Find the relevant object directory and library name. + if test "X$installed" = Xyes; then + if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then + func_warning "library \`$lib' was moved." + dir="$ladir" + absdir="$abs_ladir" + libdir="$abs_ladir" + else + dir="$lt_sysroot$libdir" + absdir="$lt_sysroot$libdir" + fi + test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes + else + if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then + dir="$ladir" + absdir="$abs_ladir" + # Remove this search path later + func_append notinst_path " $abs_ladir" + else + dir="$ladir/$objdir" + absdir="$abs_ladir/$objdir" + # Remove this search path later + func_append notinst_path " $abs_ladir" + fi + fi # $installed = yes + func_stripname 'lib' '.la' "$laname" + name=$func_stripname_result + + # This library was specified with -dlpreopen. + if test "$pass" = dlpreopen; then + if test -z "$libdir" && test "$linkmode" = prog; then + func_fatal_error "only libraries may -dlpreopen a convenience library: \`$lib'" + fi + case "$host" in + # special handling for platforms with PE-DLLs. + *cygwin* | *mingw* | *cegcc* ) + # Linker will automatically link against shared library if both + # static and shared are present. Therefore, ensure we extract + # symbols from the import library if a shared library is present + # (otherwise, the dlopen module name will be incorrect). We do + # this by putting the import library name into $newdlprefiles. + # We recover the dlopen module name by 'saving' the la file + # name in a special purpose variable, and (later) extracting the + # dlname from the la file. + if test -n "$dlname"; then + func_tr_sh "$dir/$linklib" + eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname" + func_append newdlprefiles " $dir/$linklib" + else + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + fi + ;; + * ) + # Prefer using a static library (so that no silly _DYNAMIC symbols + # are required to link). + if test -n "$old_library"; then + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + # Otherwise, use the dlname, so that lt_dlopen finds it. + elif test -n "$dlname"; then + func_append newdlprefiles " $dir/$dlname" + else + func_append newdlprefiles " $dir/$linklib" + fi + ;; + esac + fi # $pass = dlpreopen + + if test -z "$libdir"; then + # Link the convenience library + if test "$linkmode" = lib; then + deplibs="$dir/$old_library $deplibs" + elif test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$dir/$old_library $compile_deplibs" + finalize_deplibs="$dir/$old_library $finalize_deplibs" + else + deplibs="$lib $deplibs" # used for prog,scan pass + fi + continue + fi + + + if test "$linkmode" = prog && test "$pass" != link; then + func_append newlib_search_path " $ladir" + deplibs="$lib $deplibs" + + linkalldeplibs=no + if test "$link_all_deplibs" != no || test -z "$library_names" || + test "$build_libtool_libs" = no; then + linkalldeplibs=yes + fi + + tmp_libs= + for deplib in $dependency_libs; do + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + esac + # Need to link against all dependency_libs? + if test "$linkalldeplibs" = yes; then + deplibs="$deplib $deplibs" + else + # Need to hardcode shared library paths + # or/and link against static libraries + newdependency_libs="$deplib $newdependency_libs" + fi + if $opt_preserve_dup_deps ; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done # for deplib + continue + fi # $linkmode = prog... + + if test "$linkmode,$pass" = "prog,link"; then + if test -n "$library_names" && + { { test "$prefer_static_libs" = no || + test "$prefer_static_libs,$installed" = "built,yes"; } || + test -z "$old_library"; }; then + # We need to hardcode the library path + if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then + # Make sure the rpath contains only unique directories. + case "$temp_rpath:" in + *"$absdir:"*) ;; + *) func_append temp_rpath "$absdir:" ;; + esac + fi + + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi # $linkmode,$pass = prog,link... + + if test "$alldeplibs" = yes && + { test "$deplibs_check_method" = pass_all || + { test "$build_libtool_libs" = yes && + test -n "$library_names"; }; }; then + # We only need to search for static libraries + continue + fi + fi + + link_static=no # Whether the deplib will be linked statically + use_static_libs=$prefer_static_libs + if test "$use_static_libs" = built && test "$installed" = yes; then + use_static_libs=no + fi + if test -n "$library_names" && + { test "$use_static_libs" = no || test -z "$old_library"; }; then + case $host in + *cygwin* | *mingw* | *cegcc*) + # No point in relinking DLLs because paths are not encoded + func_append notinst_deplibs " $lib" + need_relink=no + ;; + *) + if test "$installed" = no; then + func_append notinst_deplibs " $lib" + need_relink=yes + fi + ;; + esac + # This is a shared library + + # Warn about portability, can't link against -module's on some + # systems (darwin). Don't bleat about dlopened modules though! + dlopenmodule="" + for dlpremoduletest in $dlprefiles; do + if test "X$dlpremoduletest" = "X$lib"; then + dlopenmodule="$dlpremoduletest" + break + fi + done + if test -z "$dlopenmodule" && test "$shouldnotlink" = yes && test "$pass" = link; then + echo + if test "$linkmode" = prog; then + $ECHO "*** Warning: Linking the executable $output against the loadable module" + else + $ECHO "*** Warning: Linking the shared library $output against the loadable module" + fi + $ECHO "*** $linklib is not portable!" + fi + if test "$linkmode" = lib && + test "$hardcode_into_libs" = yes; then + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi + + if test -n "$old_archive_from_expsyms_cmds"; then + # figure out the soname + set dummy $library_names + shift + realname="$1" + shift + libname=`eval "\\$ECHO \"$libname_spec\""` + # use dlname if we got it. it's perfectly good, no? + if test -n "$dlname"; then + soname="$dlname" + elif test -n "$soname_spec"; then + # bleh windows + case $host in + *cygwin* | mingw* | *cegcc*) + func_arith $current - $age + major=$func_arith_result + versuffix="-$major" + ;; + esac + eval soname=\"$soname_spec\" + else + soname="$realname" + fi + + # Make a new name for the extract_expsyms_cmds to use + soroot="$soname" + func_basename "$soroot" + soname="$func_basename_result" + func_stripname 'lib' '.dll' "$soname" + newlib=libimp-$func_stripname_result.a + + # If the library has no export list, then create one now + if test -f "$output_objdir/$soname-def"; then : + else + func_verbose "extracting exported symbol list from \`$soname'" + func_execute_cmds "$extract_expsyms_cmds" 'exit $?' + fi + + # Create $newlib + if test -f "$output_objdir/$newlib"; then :; else + func_verbose "generating import library for \`$soname'" + func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?' + fi + # make sure the library variables are pointing to the new library + dir=$output_objdir + linklib=$newlib + fi # test -n "$old_archive_from_expsyms_cmds" + + if test "$linkmode" = prog || test "$opt_mode" != relink; then + add_shlibpath= + add_dir= + add= + lib_linked=yes + case $hardcode_action in + immediate | unsupported) + if test "$hardcode_direct" = no; then + add="$dir/$linklib" + case $host in + *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;; + *-*-sysv4*uw2*) add_dir="-L$dir" ;; + *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \ + *-*-unixware7*) add_dir="-L$dir" ;; + *-*-darwin* ) + # if the lib is a (non-dlopened) module then we can not + # link against it, someone is ignoring the earlier warnings + if /usr/bin/file -L $add 2> /dev/null | + $GREP ": [^:]* bundle" >/dev/null ; then + if test "X$dlopenmodule" != "X$lib"; then + $ECHO "*** Warning: lib $linklib is a module, not a shared library" + if test -z "$old_library" ; then + echo + echo "*** And there doesn't seem to be a static archive available" + echo "*** The link will probably fail, sorry" + else + add="$dir/$old_library" + fi + elif test -n "$old_library"; then + add="$dir/$old_library" + fi + fi + esac + elif test "$hardcode_minus_L" = no; then + case $host in + *-*-sunos*) add_shlibpath="$dir" ;; + esac + add_dir="-L$dir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = no; then + add_shlibpath="$dir" + add="-l$name" + else + lib_linked=no + fi + ;; + relink) + if test "$hardcode_direct" = yes && + test "$hardcode_direct_absolute" = no; then + add="$dir/$linklib" + elif test "$hardcode_minus_L" = yes; then + add_dir="-L$absdir" + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add="-l$name" + elif test "$hardcode_shlibpath_var" = yes; then + add_shlibpath="$dir" + add="-l$name" + else + lib_linked=no + fi + ;; + *) lib_linked=no ;; + esac + + if test "$lib_linked" != yes; then + func_fatal_configuration "unsupported hardcode properties" + fi + + if test -n "$add_shlibpath"; then + case :$compile_shlibpath: in + *":$add_shlibpath:"*) ;; + *) func_append compile_shlibpath "$add_shlibpath:" ;; + esac + fi + if test "$linkmode" = prog; then + test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" + test -n "$add" && compile_deplibs="$add $compile_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + if test "$hardcode_direct" != yes && + test "$hardcode_minus_L" != yes && + test "$hardcode_shlibpath_var" = yes; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + fi + fi + fi + + if test "$linkmode" = prog || test "$opt_mode" = relink; then + add_shlibpath= + add_dir= + add= + # Finalize command for both is simple: just hardcode it. + if test "$hardcode_direct" = yes && + test "$hardcode_direct_absolute" = no; then + add="$libdir/$linklib" + elif test "$hardcode_minus_L" = yes; then + add_dir="-L$libdir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = yes; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + add="-l$name" + elif test "$hardcode_automatic" = yes; then + if test -n "$inst_prefix_dir" && + test -f "$inst_prefix_dir$libdir/$linklib" ; then + add="$inst_prefix_dir$libdir/$linklib" + else + add="$libdir/$linklib" + fi + else + # We cannot seem to hardcode it, guess we'll fake it. + add_dir="-L$libdir" + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add="-l$name" + fi + + if test "$linkmode" = prog; then + test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" + test -n "$add" && finalize_deplibs="$add $finalize_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + fi + fi + elif test "$linkmode" = prog; then + # Here we assume that one of hardcode_direct or hardcode_minus_L + # is not unsupported. This is valid on all known static and + # shared platforms. + if test "$hardcode_direct" != unsupported; then + test -n "$old_library" && linklib="$old_library" + compile_deplibs="$dir/$linklib $compile_deplibs" + finalize_deplibs="$dir/$linklib $finalize_deplibs" + else + compile_deplibs="-l$name -L$dir $compile_deplibs" + finalize_deplibs="-l$name -L$dir $finalize_deplibs" + fi + elif test "$build_libtool_libs" = yes; then + # Not a shared library + if test "$deplibs_check_method" != pass_all; then + # We're trying link a shared library against a static one + # but the system doesn't support it. + + # Just print a warning and add the library to dependency_libs so + # that the program can be linked against the static library. + echo + $ECHO "*** Warning: This system can not link to static lib archive $lib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + if test "$module" = yes; then + echo "*** But as you try to build a module library, libtool will still create " + echo "*** a static module, that should work as long as the dlopening application" + echo "*** is linked with the -dlopen flag to resolve symbols at runtime." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using \`nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** \`nm' from GNU binutils and a full rebuild may help." + fi + if test "$build_old_libs" = no; then + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + else + deplibs="$dir/$old_library $deplibs" + link_static=yes + fi + fi # link shared/static library? + + if test "$linkmode" = lib; then + if test -n "$dependency_libs" && + { test "$hardcode_into_libs" != yes || + test "$build_old_libs" = yes || + test "$link_static" = yes; }; then + # Extract -R from dependency_libs + temp_deplibs= + for libdir in $dependency_libs; do + case $libdir in + -R*) func_stripname '-R' '' "$libdir" + temp_xrpath=$func_stripname_result + case " $xrpath " in + *" $temp_xrpath "*) ;; + *) func_append xrpath " $temp_xrpath";; + esac;; + *) func_append temp_deplibs " $libdir";; + esac + done + dependency_libs="$temp_deplibs" + fi + + func_append newlib_search_path " $absdir" + # Link against this library + test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs" + # ... and its dependency_libs + tmp_libs= + for deplib in $dependency_libs; do + newdependency_libs="$deplib $newdependency_libs" + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result";; + *) func_resolve_sysroot "$deplib" ;; + esac + if $opt_preserve_dup_deps ; then + case "$tmp_libs " in + *" $func_resolve_sysroot_result "*) + func_append specialdeplibs " $func_resolve_sysroot_result" ;; + esac + fi + func_append tmp_libs " $func_resolve_sysroot_result" + done + + if test "$link_all_deplibs" != no; then + # Add the search paths of all dependency libraries + for deplib in $dependency_libs; do + path= + case $deplib in + -L*) path="$deplib" ;; + *.la) + func_resolve_sysroot "$deplib" + deplib=$func_resolve_sysroot_result + func_dirname "$deplib" "" "." + dir=$func_dirname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;; + *) + absdir=`cd "$dir" && pwd` + if test -z "$absdir"; then + func_warning "cannot determine absolute directory name of \`$dir'" + absdir="$dir" + fi + ;; + esac + if $GREP "^installed=no" $deplib > /dev/null; then + case $host in + *-*-darwin*) + depdepl= + eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib` + if test -n "$deplibrary_names" ; then + for tmp in $deplibrary_names ; do + depdepl=$tmp + done + if test -f "$absdir/$objdir/$depdepl" ; then + depdepl="$absdir/$objdir/$depdepl" + darwin_install_name=`${OTOOL} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + if test -z "$darwin_install_name"; then + darwin_install_name=`${OTOOL64} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + fi + func_append compiler_flags " ${wl}-dylib_file ${wl}${darwin_install_name}:${depdepl}" + func_append linker_flags " -dylib_file ${darwin_install_name}:${depdepl}" + path= + fi + fi + ;; + *) + path="-L$absdir/$objdir" + ;; + esac + else + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + test -z "$libdir" && \ + func_fatal_error "\`$deplib' is not a valid libtool archive" + test "$absdir" != "$libdir" && \ + func_warning "\`$deplib' seems to be moved" + + path="-L$absdir" + fi + ;; + esac + case " $deplibs " in + *" $path "*) ;; + *) deplibs="$path $deplibs" ;; + esac + done + fi # link_all_deplibs != no + fi # linkmode = lib + done # for deplib in $libs + if test "$pass" = link; then + if test "$linkmode" = "prog"; then + compile_deplibs="$new_inherited_linker_flags $compile_deplibs" + finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs" + else + compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + fi + fi + dependency_libs="$newdependency_libs" + if test "$pass" = dlpreopen; then + # Link the dlpreopened libraries before other libraries + for deplib in $save_deplibs; do + deplibs="$deplib $deplibs" + done + fi + if test "$pass" != dlopen; then + if test "$pass" != conv; then + # Make sure lib_search_path contains only unique directories. + lib_search_path= + for dir in $newlib_search_path; do + case "$lib_search_path " in + *" $dir "*) ;; + *) func_append lib_search_path " $dir" ;; + esac + done + newlib_search_path= + fi + + if test "$linkmode,$pass" != "prog,link"; then + vars="deplibs" + else + vars="compile_deplibs finalize_deplibs" + fi + for var in $vars dependency_libs; do + # Add libraries to $var in reverse order + eval tmp_libs=\"\$$var\" + new_libs= + for deplib in $tmp_libs; do + # FIXME: Pedantically, this is the right thing to do, so + # that some nasty dependency loop isn't accidentally + # broken: + #new_libs="$deplib $new_libs" + # Pragmatically, this seems to cause very few problems in + # practice: + case $deplib in + -L*) new_libs="$deplib $new_libs" ;; + -R*) ;; + *) + # And here is the reason: when a library appears more + # than once as an explicit dependence of a library, or + # is implicitly linked in more than once by the + # compiler, it is considered special, and multiple + # occurrences thereof are not removed. Compare this + # with having the same library being listed as a + # dependency of multiple other libraries: in this case, + # we know (pedantically, we assume) the library does not + # need to be listed more than once, so we keep only the + # last copy. This is not always right, but it is rare + # enough that we require users that really mean to play + # such unportable linking tricks to link the library + # using -Wl,-lname, so that libtool does not consider it + # for duplicate removal. + case " $specialdeplibs " in + *" $deplib "*) new_libs="$deplib $new_libs" ;; + *) + case " $new_libs " in + *" $deplib "*) ;; + *) new_libs="$deplib $new_libs" ;; + esac + ;; + esac + ;; + esac + done + tmp_libs= + for deplib in $new_libs; do + case $deplib in + -L*) + case " $tmp_libs " in + *" $deplib "*) ;; + *) func_append tmp_libs " $deplib" ;; + esac + ;; + *) func_append tmp_libs " $deplib" ;; + esac + done + eval $var=\"$tmp_libs\" + done # for var + fi + # Last step: remove runtime libs from dependency_libs + # (they stay in deplibs) + tmp_libs= + for i in $dependency_libs ; do + case " $predeps $postdeps $compiler_lib_search_path " in + *" $i "*) + i="" + ;; + esac + if test -n "$i" ; then + func_append tmp_libs " $i" + fi + done + dependency_libs=$tmp_libs + done # for pass + if test "$linkmode" = prog; then + dlfiles="$newdlfiles" + fi + if test "$linkmode" = prog || test "$linkmode" = lib; then + dlprefiles="$newdlprefiles" + fi + + case $linkmode in + oldlib) + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + func_warning "\`-dlopen' is ignored for archives" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "\`-l' and \`-L' are ignored for archives" ;; + esac + + test -n "$rpath" && \ + func_warning "\`-rpath' is ignored for archives" + + test -n "$xrpath" && \ + func_warning "\`-R' is ignored for archives" + + test -n "$vinfo" && \ + func_warning "\`-version-info/-version-number' is ignored for archives" + + test -n "$release" && \ + func_warning "\`-release' is ignored for archives" + + test -n "$export_symbols$export_symbols_regex" && \ + func_warning "\`-export-symbols' is ignored for archives" + + # Now set the variables for building old libraries. + build_libtool_libs=no + oldlibs="$output" + func_append objs "$old_deplibs" + ;; + + lib) + # Make sure we only generate libraries of the form `libNAME.la'. + case $outputname in + lib*) + func_stripname 'lib' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + ;; + *) + test "$module" = no && \ + func_fatal_help "libtool library \`$output' must begin with \`lib'" + + if test "$need_lib_prefix" != no; then + # Add the "lib" prefix for modules if required + func_stripname '' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + else + func_stripname '' '.la' "$outputname" + libname=$func_stripname_result + fi + ;; + esac + + if test -n "$objs"; then + if test "$deplibs_check_method" != pass_all; then + func_fatal_error "cannot build libtool library \`$output' from non-libtool objects on this host:$objs" + else + echo + $ECHO "*** Warning: Linking the shared library $output against the non-libtool" + $ECHO "*** objects $objs is not portable!" + func_append libobjs " $objs" + fi + fi + + test "$dlself" != no && \ + func_warning "\`-dlopen self' is ignored for libtool libraries" + + set dummy $rpath + shift + test "$#" -gt 1 && \ + func_warning "ignoring multiple \`-rpath's for a libtool library" + + install_libdir="$1" + + oldlibs= + if test -z "$rpath"; then + if test "$build_libtool_libs" = yes; then + # Building a libtool convenience library. + # Some compilers have problems with a `.al' extension so + # convenience libraries should have the same extension an + # archive normally would. + oldlibs="$output_objdir/$libname.$libext $oldlibs" + build_libtool_libs=convenience + build_old_libs=yes + fi + + test -n "$vinfo" && \ + func_warning "\`-version-info/-version-number' is ignored for convenience libraries" + + test -n "$release" && \ + func_warning "\`-release' is ignored for convenience libraries" + else + + # Parse the version information argument. + save_ifs="$IFS"; IFS=':' + set dummy $vinfo 0 0 0 + shift + IFS="$save_ifs" + + test -n "$7" && \ + func_fatal_help "too many parameters to \`-version-info'" + + # convert absolute version numbers to libtool ages + # this retains compatibility with .la files and attempts + # to make the code below a bit more comprehensible + + case $vinfo_number in + yes) + number_major="$1" + number_minor="$2" + number_revision="$3" + # + # There are really only two kinds -- those that + # use the current revision as the major version + # and those that subtract age and use age as + # a minor version. But, then there is irix + # which has an extra 1 added just for fun + # + case $version_type in + # correct linux to gnu/linux during the next big refactor + darwin|linux|osf|windows|none) + func_arith $number_major + $number_minor + current=$func_arith_result + age="$number_minor" + revision="$number_revision" + ;; + freebsd-aout|freebsd-elf|qnx|sunos) + current="$number_major" + revision="$number_minor" + age="0" + ;; + irix|nonstopux) + func_arith $number_major + $number_minor + current=$func_arith_result + age="$number_minor" + revision="$number_minor" + lt_irix_increment=no + ;; + *) + func_fatal_configuration "$modename: unknown library version type \`$version_type'" + ;; + esac + ;; + no) + current="$1" + revision="$2" + age="$3" + ;; + esac + + # Check that each of the things are valid numbers. + case $current in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "CURRENT \`$current' must be a nonnegative integer" + func_fatal_error "\`$vinfo' is not valid version information" + ;; + esac + + case $revision in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "REVISION \`$revision' must be a nonnegative integer" + func_fatal_error "\`$vinfo' is not valid version information" + ;; + esac + + case $age in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "AGE \`$age' must be a nonnegative integer" + func_fatal_error "\`$vinfo' is not valid version information" + ;; + esac + + if test "$age" -gt "$current"; then + func_error "AGE \`$age' is greater than the current interface number \`$current'" + func_fatal_error "\`$vinfo' is not valid version information" + fi + + # Calculate the version variables. + major= + versuffix= + verstring= + case $version_type in + none) ;; + + darwin) + # Like Linux, but with the current version available in + # verstring for coding it into the library header + func_arith $current - $age + major=.$func_arith_result + versuffix="$major.$age.$revision" + # Darwin ld doesn't like 0 for these options... + func_arith $current + 1 + minor_current=$func_arith_result + xlcverstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision" + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + ;; + + freebsd-aout) + major=".$current" + versuffix=".$current.$revision"; + ;; + + freebsd-elf) + major=".$current" + versuffix=".$current" + ;; + + irix | nonstopux) + if test "X$lt_irix_increment" = "Xno"; then + func_arith $current - $age + else + func_arith $current - $age + 1 + fi + major=$func_arith_result + + case $version_type in + nonstopux) verstring_prefix=nonstopux ;; + *) verstring_prefix=sgi ;; + esac + verstring="$verstring_prefix$major.$revision" + + # Add in all the interfaces that we are compatible with. + loop=$revision + while test "$loop" -ne 0; do + func_arith $revision - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring="$verstring_prefix$major.$iface:$verstring" + done + + # Before this point, $major must not contain `.'. + major=.$major + versuffix="$major.$revision" + ;; + + linux) # correct to gnu/linux during the next big refactor + func_arith $current - $age + major=.$func_arith_result + versuffix="$major.$age.$revision" + ;; + + osf) + func_arith $current - $age + major=.$func_arith_result + versuffix=".$current.$age.$revision" + verstring="$current.$age.$revision" + + # Add in all the interfaces that we are compatible with. + loop=$age + while test "$loop" -ne 0; do + func_arith $current - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring="$verstring:${iface}.0" + done + + # Make executables depend on our current version. + func_append verstring ":${current}.0" + ;; + + qnx) + major=".$current" + versuffix=".$current" + ;; + + sunos) + major=".$current" + versuffix=".$current.$revision" + ;; + + windows) + # Use '-' rather than '.', since we only want one + # extension on DOS 8.3 filesystems. + func_arith $current - $age + major=$func_arith_result + versuffix="-$major" + ;; + + *) + func_fatal_configuration "unknown library version type \`$version_type'" + ;; + esac + + # Clear the version info if we defaulted, and they specified a release. + if test -z "$vinfo" && test -n "$release"; then + major= + case $version_type in + darwin) + # we can't check for "0.0" in archive_cmds due to quoting + # problems, so we reset it completely + verstring= + ;; + *) + verstring="0.0" + ;; + esac + if test "$need_version" = no; then + versuffix= + else + versuffix=".0.0" + fi + fi + + # Remove version info from name if versioning should be avoided + if test "$avoid_version" = yes && test "$need_version" = no; then + major= + versuffix= + verstring="" + fi + + # Check to see if the archive will have undefined symbols. + if test "$allow_undefined" = yes; then + if test "$allow_undefined_flag" = unsupported; then + func_warning "undefined symbols not allowed in $host shared libraries" + build_libtool_libs=no + build_old_libs=yes + fi + else + # Don't allow undefined symbols. + allow_undefined_flag="$no_undefined_flag" + fi + + fi + + func_generate_dlsyms "$libname" "$libname" "yes" + func_append libobjs " $symfileobj" + test "X$libobjs" = "X " && libobjs= + + if test "$opt_mode" != relink; then + # Remove our outputs, but don't remove object files since they + # may have been created when compiling PIC objects. + removelist= + tempremovelist=`$ECHO "$output_objdir/*"` + for p in $tempremovelist; do + case $p in + *.$objext | *.gcno) + ;; + $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*) + if test "X$precious_files_regex" != "X"; then + if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1 + then + continue + fi + fi + func_append removelist " $p" + ;; + *) ;; + esac + done + test -n "$removelist" && \ + func_show_eval "${RM}r \$removelist" + fi + + # Now set the variables for building old libraries. + if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then + func_append oldlibs " $output_objdir/$libname.$libext" + + # Transform .lo files to .o files. + oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; $lo2o" | $NL2SP` + fi + + # Eliminate all temporary directories. + #for path in $notinst_path; do + # lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"` + # deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"` + # dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"` + #done + + if test -n "$xrpath"; then + # If the user specified any rpath flags, then add them. + temp_xrpath= + for libdir in $xrpath; do + func_replace_sysroot "$libdir" + func_append temp_xrpath " -R$func_replace_sysroot_result" + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then + dependency_libs="$temp_xrpath $dependency_libs" + fi + fi + + # Make sure dlfiles contains only unique files that won't be dlpreopened + old_dlfiles="$dlfiles" + dlfiles= + for lib in $old_dlfiles; do + case " $dlprefiles $dlfiles " in + *" $lib "*) ;; + *) func_append dlfiles " $lib" ;; + esac + done + + # Make sure dlprefiles contains only unique files + old_dlprefiles="$dlprefiles" + dlprefiles= + for lib in $old_dlprefiles; do + case "$dlprefiles " in + *" $lib "*) ;; + *) func_append dlprefiles " $lib" ;; + esac + done + + if test "$build_libtool_libs" = yes; then + if test -n "$rpath"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) + # these systems don't actually have a c library (as such)! + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C library is in the System framework + func_append deplibs " System.ltframework" + ;; + *-*-netbsd*) + # Don't link with libc until the a.out ld.so is fixed. + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) + # Do not include libc due to us having libc/libc_r. + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + ;; + *) + # Add libc to deplibs on all other systems if necessary. + if test "$build_libtool_need_lc" = "yes"; then + func_append deplibs " -lc" + fi + ;; + esac + fi + + # Transform deplibs into only deplibs that can be linked in shared. + name_save=$name + libname_save=$libname + release_save=$release + versuffix_save=$versuffix + major_save=$major + # I'm not sure if I'm treating the release correctly. I think + # release should show up in the -l (ie -lgmp5) so we don't want to + # add it in twice. Is that correct? + release="" + versuffix="" + major="" + newdeplibs= + droppeddeps=no + case $deplibs_check_method in + pass_all) + # Don't check for shared/static. Everything works. + # This might be a little naive. We might want to check + # whether the library exists or not. But this is on + # osf3 & osf4 and I'm not really sure... Just + # implementing what was already the behavior. + newdeplibs=$deplibs + ;; + test_compile) + # This code stresses the "libraries are programs" paradigm to its + # limits. Maybe even breaks it. We compile a program, linking it + # against the deplibs as a proxy for the library. Then we can check + # whether they linked in statically or dynamically with ldd. + $opt_dry_run || $RM conftest.c + cat > conftest.c <<EOF + int main() { return 0; } +EOF + $opt_dry_run || $RM conftest + if $LTCC $LTCFLAGS -o conftest conftest.c $deplibs; then + ldd_output=`ldd conftest` + for i in $deplibs; do + case $i in + -l*) + func_stripname -l '' "$i" + name=$func_stripname_result + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + case " $predeps $postdeps " in + *" $i "*) + func_append newdeplibs " $i" + i="" + ;; + esac + fi + if test -n "$i" ; then + libname=`eval "\\$ECHO \"$libname_spec\""` + deplib_matches=`eval "\\$ECHO \"$library_names_spec\""` + set dummy $deplib_matches; shift + deplib_match=$1 + if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then + func_append newdeplibs " $i" + else + droppeddeps=yes + echo + $ECHO "*** Warning: dynamic linker does not accept needed library $i." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which I believe you do not have" + echo "*** because a test_compile did reveal that the linker did not use it for" + echo "*** its dynamic dependency list that programs get resolved with at runtime." + fi + fi + ;; + *) + func_append newdeplibs " $i" + ;; + esac + done + else + # Error occurred in the first compile. Let's try to salvage + # the situation: Compile a separate program for each library. + for i in $deplibs; do + case $i in + -l*) + func_stripname -l '' "$i" + name=$func_stripname_result + $opt_dry_run || $RM conftest + if $LTCC $LTCFLAGS -o conftest conftest.c $i; then + ldd_output=`ldd conftest` + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + case " $predeps $postdeps " in + *" $i "*) + func_append newdeplibs " $i" + i="" + ;; + esac + fi + if test -n "$i" ; then + libname=`eval "\\$ECHO \"$libname_spec\""` + deplib_matches=`eval "\\$ECHO \"$library_names_spec\""` + set dummy $deplib_matches; shift + deplib_match=$1 + if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then + func_append newdeplibs " $i" + else + droppeddeps=yes + echo + $ECHO "*** Warning: dynamic linker does not accept needed library $i." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because a test_compile did reveal that the linker did not use this one" + echo "*** as a dynamic dependency that programs can get resolved with at runtime." + fi + fi + else + droppeddeps=yes + echo + $ECHO "*** Warning! Library $i is needed by this library but I was not able to" + echo "*** make it link in! You will probably need to install it or some" + echo "*** library that it depends on before this library will be fully" + echo "*** functional. Installing it before continuing would be even better." + fi + ;; + *) + func_append newdeplibs " $i" + ;; + esac + done + fi + ;; + file_magic*) + set dummy $deplibs_check_method; shift + file_magic_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + for a_deplib in $deplibs; do + case $a_deplib in + -l*) + func_stripname -l '' "$a_deplib" + name=$func_stripname_result + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + case " $predeps $postdeps " in + *" $a_deplib "*) + func_append newdeplibs " $a_deplib" + a_deplib="" + ;; + esac + fi + if test -n "$a_deplib" ; then + libname=`eval "\\$ECHO \"$libname_spec\""` + if test -n "$file_magic_glob"; then + libnameglob=`func_echo_all "$libname" | $SED -e $file_magic_glob` + else + libnameglob=$libname + fi + test "$want_nocaseglob" = yes && nocaseglob=`shopt -p nocaseglob` + for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do + if test "$want_nocaseglob" = yes; then + shopt -s nocaseglob + potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` + $nocaseglob + else + potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` + fi + for potent_lib in $potential_libs; do + # Follow soft links. + if ls -lLd "$potent_lib" 2>/dev/null | + $GREP " -> " >/dev/null; then + continue + fi + # The statement above tries to avoid entering an + # endless loop below, in case of cyclic links. + # We might still enter an endless loop, since a link + # loop can be closed while we follow links, + # but so what? + potlib="$potent_lib" + while test -h "$potlib" 2>/dev/null; do + potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'` + case $potliblink in + [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";; + *) potlib=`$ECHO "$potlib" | $SED 's,[^/]*$,,'`"$potliblink";; + esac + done + if eval $file_magic_cmd \"\$potlib\" 2>/dev/null | + $SED -e 10q | + $EGREP "$file_magic_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib="" + break 2 + fi + done + done + fi + if test -n "$a_deplib" ; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib" ; then + $ECHO "*** with $libname but no candidates were found. (...for file magic test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a file magic. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + for a_deplib in $deplibs; do + case $a_deplib in + -l*) + func_stripname -l '' "$a_deplib" + name=$func_stripname_result + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + case " $predeps $postdeps " in + *" $a_deplib "*) + func_append newdeplibs " $a_deplib" + a_deplib="" + ;; + esac + fi + if test -n "$a_deplib" ; then + libname=`eval "\\$ECHO \"$libname_spec\""` + for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do + potential_libs=`ls $i/$libname[.-]* 2>/dev/null` + for potent_lib in $potential_libs; do + potlib="$potent_lib" # see symlink-check above in file_magic test + if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \ + $EGREP "$match_pattern_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib="" + break 2 + fi + done + done + fi + if test -n "$a_deplib" ; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib" ; then + $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a regex pattern. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + none | unknown | *) + newdeplibs="" + tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'` + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + for i in $predeps $postdeps ; do + # can't use Xsed below, because $i might contain '/' + tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s,$i,,"` + done + fi + case $tmp_deplibs in + *[!\ \ ]*) + echo + if test "X$deplibs_check_method" = "Xnone"; then + echo "*** Warning: inter-library dependencies are not supported in this platform." + else + echo "*** Warning: inter-library dependencies are not known to be supported." + fi + echo "*** All declared inter-library dependencies are being dropped." + droppeddeps=yes + ;; + esac + ;; + esac + versuffix=$versuffix_save + major=$major_save + release=$release_save + libname=$libname_save + name=$name_save + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library with the System framework + newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + if test "$droppeddeps" = yes; then + if test "$module" = yes; then + echo + echo "*** Warning: libtool could not satisfy all declared inter-library" + $ECHO "*** dependencies of module $libname. Therefore, libtool will create" + echo "*** a static module, that should work as long as the dlopening" + echo "*** application is linked with the -dlopen flag." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using \`nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** \`nm' from GNU binutils and a full rebuild may help." + fi + if test "$build_old_libs" = no; then + oldlibs="$output_objdir/$libname.$libext" + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + else + echo "*** The inter-library dependencies that have been dropped here will be" + echo "*** automatically added whenever a program is linked with this library" + echo "*** or is declared to -dlopen it." + + if test "$allow_undefined" = no; then + echo + echo "*** Since this library must not contain undefined symbols," + echo "*** because either the platform does not support them or" + echo "*** it was explicitly requested with -no-undefined," + echo "*** libtool will only create a static version of it." + if test "$build_old_libs" = no; then + oldlibs="$output_objdir/$libname.$libext" + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + fi + fi + # Done checking deplibs! + deplibs=$newdeplibs + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + case $host in + *-*-darwin*) + newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + deplibs="$new_libs" + + # All the library-specific variables (install_libdir is set above). + library_names= + old_library= + dlname= + + # Test again, we may have decided not to build it any more + if test "$build_libtool_libs" = yes; then + # Remove ${wl} instances when linking with ld. + # FIXME: should test the right _cmds variable. + case $archive_cmds in + *\$LD\ *) wl= ;; + esac + if test "$hardcode_into_libs" = yes; then + # Hardcode the library paths + hardcode_libdirs= + dep_rpath= + rpath="$finalize_rpath" + test "$opt_mode" != relink && rpath="$compile_rpath$rpath" + for libdir in $rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + func_replace_sysroot "$libdir" + libdir=$func_replace_sysroot_result + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append dep_rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval "dep_rpath=\"$hardcode_libdir_flag_spec\"" + fi + if test -n "$runpath_var" && test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" + fi + test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" + fi + + shlibpath="$finalize_shlibpath" + test "$opt_mode" != relink && shlibpath="$compile_shlibpath$shlibpath" + if test -n "$shlibpath"; then + eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" + fi + + # Get the real and link names of the library. + eval shared_ext=\"$shrext_cmds\" + eval library_names=\"$library_names_spec\" + set dummy $library_names + shift + realname="$1" + shift + + if test -n "$soname_spec"; then + eval soname=\"$soname_spec\" + else + soname="$realname" + fi + if test -z "$dlname"; then + dlname=$soname + fi + + lib="$output_objdir/$realname" + linknames= + for link + do + func_append linknames " $link" + done + + # Use standard objects if they are pic + test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP` + test "X$libobjs" = "X " && libobjs= + + delfiles= + if test -n "$export_symbols" && test -n "$include_expsyms"; then + $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp" + export_symbols="$output_objdir/$libname.uexp" + func_append delfiles " $export_symbols" + fi + + orig_export_symbols= + case $host_os in + cygwin* | mingw* | cegcc*) + if test -n "$export_symbols" && test -z "$export_symbols_regex"; then + # exporting using user supplied symfile + if test "x`$SED 1q $export_symbols`" != xEXPORTS; then + # and it's NOT already a .def file. Must figure out + # which of the given symbols are data symbols and tag + # them as such. So, trigger use of export_symbols_cmds. + # export_symbols gets reassigned inside the "prepare + # the list of exported symbols" if statement, so the + # include_expsyms logic still works. + orig_export_symbols="$export_symbols" + export_symbols= + always_export_symbols=yes + fi + fi + ;; + esac + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then + func_verbose "generating symbol list for \`$libname.la'" + export_symbols="$output_objdir/$libname.exp" + $opt_dry_run || $RM $export_symbols + cmds=$export_symbols_cmds + save_ifs="$IFS"; IFS='~' + for cmd1 in $cmds; do + IFS="$save_ifs" + # Take the normal branch if the nm_file_list_spec branch + # doesn't work or if tool conversion is not needed. + case $nm_file_list_spec~$to_tool_file_cmd in + *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) + try_normal_branch=yes + eval cmd=\"$cmd1\" + func_len " $cmd" + len=$func_len_result + ;; + *) + try_normal_branch=no + ;; + esac + if test "$try_normal_branch" = yes \ + && { test "$len" -lt "$max_cmd_len" \ + || test "$max_cmd_len" -le -1; } + then + func_show_eval "$cmd" 'exit $?' + skipped_export=false + elif test -n "$nm_file_list_spec"; then + func_basename "$output" + output_la=$func_basename_result + save_libobjs=$libobjs + save_output=$output + output=${output_objdir}/${output_la}.nm + func_to_tool_file "$output" + libobjs=$nm_file_list_spec$func_to_tool_file_result + func_append delfiles " $output" + func_verbose "creating $NM input file list: $output" + for obj in $save_libobjs; do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > "$output" + eval cmd=\"$cmd1\" + func_show_eval "$cmd" 'exit $?' + output=$save_output + libobjs=$save_libobjs + skipped_export=false + else + # The command line is too long to execute in one step. + func_verbose "using reloadable object file for export list..." + skipped_export=: + # Break out early, otherwise skipped_export may be + # set to false by a later but shorter cmd. + break + fi + done + IFS="$save_ifs" + if test -n "$export_symbols_regex" && test "X$skipped_export" != "X:"; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + fi + + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols="$export_symbols" + test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols" + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test "X$skipped_export" != "X:" && test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for \`$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + + tmp_deplibs= + for test_deplib in $deplibs; do + case " $convenience " in + *" $test_deplib "*) ;; + *) + func_append tmp_deplibs " $test_deplib" + ;; + esac + done + deplibs="$tmp_deplibs" + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec" && + test "$compiler_needs_object" = yes && + test -z "$libobjs"; then + # extract the archives, so we have objects to list. + # TODO: could optimize this to just extract one archive. + whole_archive_flag_spec= + fi + if test -n "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + else + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + fi + + if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then + eval flag=\"$thread_safe_flag_spec\" + func_append linker_flags " $flag" + fi + + # Make a backup of the uninstalled library when relinking + if test "$opt_mode" = relink; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $? + fi + + # Do each of the archive commands. + if test "$module" = yes && test -n "$module_cmds" ; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + eval test_cmds=\"$module_expsym_cmds\" + cmds=$module_expsym_cmds + else + eval test_cmds=\"$module_cmds\" + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + eval test_cmds=\"$archive_expsym_cmds\" + cmds=$archive_expsym_cmds + else + eval test_cmds=\"$archive_cmds\" + cmds=$archive_cmds + fi + fi + + if test "X$skipped_export" != "X:" && + func_len " $test_cmds" && + len=$func_len_result && + test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + : + else + # The command line is too long to link in one step, link piecewise + # or, if using GNU ld and skipped_export is not :, use a linker + # script. + + # Save the value of $output and $libobjs because we want to + # use them later. If we have whole_archive_flag_spec, we + # want to use save_libobjs as it was before + # whole_archive_flag_spec was expanded, because we can't + # assume the linker understands whole_archive_flag_spec. + # This may have to be revisited, in case too many + # convenience libraries get linked in and end up exceeding + # the spec. + if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + fi + save_output=$output + func_basename "$output" + output_la=$func_basename_result + + # Clear the reloadable object creation command queue and + # initialize k to one. + test_cmds= + concat_cmds= + objlist= + last_robj= + k=1 + + if test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "$with_gnu_ld" = yes; then + output=${output_objdir}/${output_la}.lnkscript + func_verbose "creating GNU ld script: $output" + echo 'INPUT (' > $output + for obj in $save_libobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + echo ')' >> $output + func_append delfiles " $output" + func_to_tool_file "$output" + output=$func_to_tool_file_result + elif test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "X$file_list_spec" != X; then + output=${output_objdir}/${output_la}.lnk + func_verbose "creating linker input file list: $output" + : > $output + set x $save_libobjs + shift + firstobj= + if test "$compiler_needs_object" = yes; then + firstobj="$1 " + shift + fi + for obj + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + func_append delfiles " $output" + func_to_tool_file "$output" + output=$firstobj\"$file_list_spec$func_to_tool_file_result\" + else + if test -n "$save_libobjs"; then + func_verbose "creating reloadable object files..." + output=$output_objdir/$output_la-${k}.$objext + eval test_cmds=\"$reload_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + + # Loop over the list of objects to be linked. + for obj in $save_libobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + if test "X$objlist" = X || + test "$len" -lt "$max_cmd_len"; then + func_append objlist " $obj" + else + # The command $test_cmds is almost too long, add a + # command to the queue. + if test "$k" -eq 1 ; then + # The first file doesn't have a previous command to add. + reload_objs=$objlist + eval concat_cmds=\"$reload_cmds\" + else + # All subsequent reloadable object files will link in + # the last one created. + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\" + fi + last_robj=$output_objdir/$output_la-${k}.$objext + func_arith $k + 1 + k=$func_arith_result + output=$output_objdir/$output_la-${k}.$objext + objlist=" $obj" + func_len " $last_robj" + func_arith $len0 + $func_len_result + len=$func_arith_result + fi + done + # Handle the remaining objects by creating one last + # reloadable object file. All subsequent reloadable object + # files will link in the last one created. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\${concat_cmds}$reload_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\${concat_cmds}~\$RM $last_robj\" + fi + func_append delfiles " $output" + + else + output= + fi + + if ${skipped_export-false}; then + func_verbose "generating symbol list for \`$libname.la'" + export_symbols="$output_objdir/$libname.exp" + $opt_dry_run || $RM $export_symbols + libobjs=$output + # Append the command to create the export file. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" + fi + fi + + test -n "$save_libobjs" && + func_verbose "creating a temporary reloadable object file: $output" + + # Loop through the commands generated above and execute them. + save_ifs="$IFS"; IFS='~' + for cmd in $concat_cmds; do + IFS="$save_ifs" + $opt_silent || { + func_quote_for_expand "$cmd" + eval "func_echo $func_quote_for_expand_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test "$opt_mode" = relink; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS="$save_ifs" + + if test -n "$export_symbols_regex" && ${skipped_export-false}; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + + if ${skipped_export-false}; then + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols="$export_symbols" + test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols" + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for \`$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + fi + + libobjs=$output + # Restore the value of output. + output=$save_output + + if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + fi + # Expand the library linking commands again to reset the + # value of $libobjs for piecewise linking. + + # Do each of the archive commands. + if test "$module" = yes && test -n "$module_cmds" ; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + cmds=$module_expsym_cmds + else + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + cmds=$archive_expsym_cmds + else + cmds=$archive_cmds + fi + fi + fi + + if test -n "$delfiles"; then + # Append the command to remove temporary files to $cmds. + eval cmds=\"\$cmds~\$RM $delfiles\" + fi + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + + save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + eval cmd=\"$cmd\" + $opt_silent || { + func_quote_for_expand "$cmd" + eval "func_echo $func_quote_for_expand_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test "$opt_mode" = relink; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS="$save_ifs" + + # Restore the uninstalled library and exit + if test "$opt_mode" = relink; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $? + + if test -n "$convenience"; then + if test -z "$whole_archive_flag_spec"; then + func_show_eval '${RM}r "$gentop"' + fi + fi + + exit $EXIT_SUCCESS + fi + + # Create links to the real library. + for linkname in $linknames; do + if test "$realname" != "$linkname"; then + func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?' + fi + done + + # If -module or -export-dynamic was specified, set the dlname. + if test "$module" = yes || test "$export_dynamic" = yes; then + # On all known operating systems, these are identical. + dlname="$soname" + fi + fi + ;; + + obj) + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + func_warning "\`-dlopen' is ignored for objects" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "\`-l' and \`-L' are ignored for objects" ;; + esac + + test -n "$rpath" && \ + func_warning "\`-rpath' is ignored for objects" + + test -n "$xrpath" && \ + func_warning "\`-R' is ignored for objects" + + test -n "$vinfo" && \ + func_warning "\`-version-info' is ignored for objects" + + test -n "$release" && \ + func_warning "\`-release' is ignored for objects" + + case $output in + *.lo) + test -n "$objs$old_deplibs" && \ + func_fatal_error "cannot build library object \`$output' from non-libtool objects" + + libobj=$output + func_lo2o "$libobj" + obj=$func_lo2o_result + ;; + *) + libobj= + obj="$output" + ;; + esac + + # Delete the old objects. + $opt_dry_run || $RM $obj $libobj + + # Objects from convenience libraries. This assumes + # single-version convenience libraries. Whenever we create + # different ones for PIC/non-PIC, this we'll have to duplicate + # the extraction. + reload_conv_objs= + gentop= + # reload_cmds runs $LD directly, so let us get rid of + # -Wl from whole_archive_flag_spec and hope we can get by with + # turning comma into space.. + wl= + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec"; then + eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\" + reload_conv_objs=$reload_objs\ `$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'` + else + gentop="$output_objdir/${obj}x" + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + reload_conv_objs="$reload_objs $func_extract_archives_result" + fi + fi + + # If we're not building shared, we need to use non_pic_objs + test "$build_libtool_libs" != yes && libobjs="$non_pic_objects" + + # Create the old-style object. + reload_objs="$objs$old_deplibs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; /\.lib$/d; $lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test + + output="$obj" + func_execute_cmds "$reload_cmds" 'exit $?' + + # Exit if we aren't doing a library object file. + if test -z "$libobj"; then + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + fi + + if test "$build_libtool_libs" != yes; then + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + # Create an invalid libtool object if no PIC, so that we don't + # accidentally link it into a program. + # $show "echo timestamp > $libobj" + # $opt_dry_run || eval "echo timestamp > $libobj" || exit $? + exit $EXIT_SUCCESS + fi + + if test -n "$pic_flag" || test "$pic_mode" != default; then + # Only do commands if we really have different PIC objects. + reload_objs="$libobjs $reload_conv_objs" + output="$libobj" + func_execute_cmds "$reload_cmds" 'exit $?' + fi + + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + ;; + + prog) + case $host in + *cygwin*) func_stripname '' '.exe' "$output" + output=$func_stripname_result.exe;; + esac + test -n "$vinfo" && \ + func_warning "\`-version-info' is ignored for programs" + + test -n "$release" && \ + func_warning "\`-release' is ignored for programs" + + test "$preload" = yes \ + && test "$dlopen_support" = unknown \ + && test "$dlopen_self" = unknown \ + && test "$dlopen_self_static" = unknown && \ + func_warning "\`LT_INIT([dlopen])' not used. Assuming no dlopen support." + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library is the System framework + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + case $host in + *-*-darwin*) + # Don't allow lazy linking, it breaks C++ global constructors + # But is supposedly fixed on 10.4 or later (yay!). + if test "$tagname" = CXX ; then + case ${MACOSX_DEPLOYMENT_TARGET-10.0} in + 10.[0123]) + func_append compile_command " ${wl}-bind_at_load" + func_append finalize_command " ${wl}-bind_at_load" + ;; + esac + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $compile_deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $compile_deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + compile_deplibs="$new_libs" + + + func_append compile_command " $compile_deplibs" + func_append finalize_command " $finalize_deplibs" + + if test -n "$rpath$xrpath"; then + # If the user specified any rpath flags, then add them. + for libdir in $rpath $xrpath; do + # This is the magic to use -rpath. + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + fi + + # Now hardcode the library paths + rpath= + hardcode_libdirs= + for libdir in $compile_rpath $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`${ECHO} "$libdir" | ${SED} -e 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$libdir:"*) ;; + ::) dllsearchpath=$libdir;; + *) func_append dllsearchpath ":$libdir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + compile_rpath="$rpath" + + rpath= + hardcode_libdirs= + for libdir in $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$finalize_perm_rpath " in + *" $libdir "*) ;; + *) func_append finalize_perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + finalize_rpath="$rpath" + + if test -n "$libobjs" && test "$build_old_libs" = yes; then + # Transform all the library objects into standard objects. + compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + fi + + func_generate_dlsyms "$outputname" "@PROGRAM@" "no" + + # template prelinking step + if test -n "$prelink_cmds"; then + func_execute_cmds "$prelink_cmds" 'exit $?' + fi + + wrappers_required=yes + case $host in + *cegcc* | *mingw32ce*) + # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway. + wrappers_required=no + ;; + *cygwin* | *mingw* ) + if test "$build_libtool_libs" != yes; then + wrappers_required=no + fi + ;; + *) + if test "$need_relink" = no || test "$build_libtool_libs" != yes; then + wrappers_required=no + fi + ;; + esac + if test "$wrappers_required" = no; then + # Replace the output file specification. + compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + link_command="$compile_command$compile_rpath" + + # We have no uninstalled library dependencies, so finalize right now. + exit_status=0 + func_show_eval "$link_command" 'exit_status=$?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Delete the generated files. + if test -f "$output_objdir/${outputname}S.${objext}"; then + func_show_eval '$RM "$output_objdir/${outputname}S.${objext}"' + fi + + exit $exit_status + fi + + if test -n "$compile_shlibpath$finalize_shlibpath"; then + compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" + fi + if test -n "$finalize_shlibpath"; then + finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" + fi + + compile_var= + finalize_var= + if test -n "$runpath_var"; then + if test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + compile_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + if test -n "$finalize_perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $finalize_perm_rpath; do + func_append rpath "$dir:" + done + finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + fi + + if test "$no_install" = yes; then + # We don't need to create a wrapper script. + link_command="$compile_var$compile_command$compile_rpath" + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + # Delete the old output file. + $opt_dry_run || $RM $output + # Link the executable and exit + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + exit $EXIT_SUCCESS + fi + + if test "$hardcode_action" = relink; then + # Fast installation is not supported + link_command="$compile_var$compile_command$compile_rpath" + relink_command="$finalize_var$finalize_command$finalize_rpath" + + func_warning "this platform does not like uninstalled shared libraries" + func_warning "\`$output' will be relinked during installation" + else + if test "$fast_install" != no; then + link_command="$finalize_var$compile_command$finalize_rpath" + if test "$fast_install" = yes; then + relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'` + else + # fast_install is set to needless + relink_command= + fi + else + link_command="$compile_var$compile_command$compile_rpath" + relink_command="$finalize_var$finalize_command$finalize_rpath" + fi + fi + + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` + + # Delete the old output files. + $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname + + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output_objdir/$outputname" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Now create the wrapper script. + func_verbose "creating $output" + + # Quote the relink command for shipping. + if test -n "$relink_command"; then + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_for_eval "$var_value" + relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" + fi + done + relink_command="(cd `pwd`; $relink_command)" + relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` + fi + + # Only actually do things if not in dry run mode. + $opt_dry_run || { + # win32 will think the script is a binary if it has + # a .exe suffix, so we strip it off here. + case $output in + *.exe) func_stripname '' '.exe' "$output" + output=$func_stripname_result ;; + esac + # test for cygwin because mv fails w/o .exe extensions + case $host in + *cygwin*) + exeext=.exe + func_stripname '' '.exe' "$outputname" + outputname=$func_stripname_result ;; + *) exeext= ;; + esac + case $host in + *cygwin* | *mingw* ) + func_dirname_and_basename "$output" "" "." + output_name=$func_basename_result + output_path=$func_dirname_result + cwrappersource="$output_path/$objdir/lt-$output_name.c" + cwrapper="$output_path/$output_name.exe" + $RM $cwrappersource $cwrapper + trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15 + + func_emit_cwrapperexe_src > $cwrappersource + + # The wrapper executable is built using the $host compiler, + # because it contains $host paths and files. If cross- + # compiling, it, like the target executable, must be + # executed on the $host or under an emulation environment. + $opt_dry_run || { + $LTCC $LTCFLAGS -o $cwrapper $cwrappersource + $STRIP $cwrapper + } + + # Now, create the wrapper script for func_source use: + func_ltwrapper_scriptname $cwrapper + $RM $func_ltwrapper_scriptname_result + trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15 + $opt_dry_run || { + # note: this script will not be executed, so do not chmod. + if test "x$build" = "x$host" ; then + $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result + else + func_emit_wrapper no > $func_ltwrapper_scriptname_result + fi + } + ;; + * ) + $RM $output + trap "$RM $output; exit $EXIT_FAILURE" 1 2 15 + + func_emit_wrapper no > $output + chmod +x $output + ;; + esac + } + exit $EXIT_SUCCESS + ;; + esac + + # See if we need to build an old-fashioned archive. + for oldlib in $oldlibs; do + + if test "$build_libtool_libs" = convenience; then + oldobjs="$libobjs_save $symfileobj" + addlibs="$convenience" + build_libtool_libs=no + else + if test "$build_libtool_libs" = module; then + oldobjs="$libobjs_save" + build_libtool_libs=no + else + oldobjs="$old_deplibs $non_pic_objects" + if test "$preload" = yes && test -f "$symfileobj"; then + func_append oldobjs " $symfileobj" + fi + fi + addlibs="$old_convenience" + fi + + if test -n "$addlibs"; then + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $addlibs + func_append oldobjs " $func_extract_archives_result" + fi + + # Do each command in the archive commands. + if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then + cmds=$old_archive_from_new_cmds + else + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append oldobjs " $func_extract_archives_result" + fi + + # POSIX demands no paths to be encoded in archives. We have + # to avoid creating archives with duplicate basenames if we + # might have to extract them afterwards, e.g., when creating a + # static archive out of a convenience library, or when linking + # the entirety of a libtool archive into another (currently + # not supported by libtool). + if (for obj in $oldobjs + do + func_basename "$obj" + $ECHO "$func_basename_result" + done | sort | sort -uc >/dev/null 2>&1); then + : + else + echo "copying selected object files to avoid basename conflicts..." + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + func_mkdir_p "$gentop" + save_oldobjs=$oldobjs + oldobjs= + counter=1 + for obj in $save_oldobjs + do + func_basename "$obj" + objbase="$func_basename_result" + case " $oldobjs " in + " ") oldobjs=$obj ;; + *[\ /]"$objbase "*) + while :; do + # Make sure we don't pick an alternate name that also + # overlaps. + newobj=lt$counter-$objbase + func_arith $counter + 1 + counter=$func_arith_result + case " $oldobjs " in + *[\ /]"$newobj "*) ;; + *) if test ! -f "$gentop/$newobj"; then break; fi ;; + esac + done + func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj" + func_append oldobjs " $gentop/$newobj" + ;; + *) func_append oldobjs " $obj" ;; + esac + done + fi + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + eval cmds=\"$old_archive_cmds\" + + func_len " $cmds" + len=$func_len_result + if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + cmds=$old_archive_cmds + elif test -n "$archiver_list_spec"; then + func_verbose "using command file archive linking..." + for obj in $oldobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > $output_objdir/$libname.libcmd + func_to_tool_file "$output_objdir/$libname.libcmd" + oldobjs=" $archiver_list_spec$func_to_tool_file_result" + cmds=$old_archive_cmds + else + # the command line is too long to link in one step, link in parts + func_verbose "using piecewise archive linking..." + save_RANLIB=$RANLIB + RANLIB=: + objlist= + concat_cmds= + save_oldobjs=$oldobjs + oldobjs= + # Is there a better way of finding the last object in the list? + for obj in $save_oldobjs + do + last_oldobj=$obj + done + eval test_cmds=\"$old_archive_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + for obj in $save_oldobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + func_append objlist " $obj" + if test "$len" -lt "$max_cmd_len"; then + : + else + # the above command should be used before it gets too long + oldobjs=$objlist + if test "$obj" = "$last_oldobj" ; then + RANLIB=$save_RANLIB + fi + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\" + objlist= + len=$len0 + fi + done + RANLIB=$save_RANLIB + oldobjs=$objlist + if test "X$oldobjs" = "X" ; then + eval cmds=\"\$concat_cmds\" + else + eval cmds=\"\$concat_cmds~\$old_archive_cmds\" + fi + fi + fi + func_execute_cmds "$cmds" 'exit $?' + done + + test -n "$generated" && \ + func_show_eval "${RM}r$generated" + + # Now create the libtool archive. + case $output in + *.la) + old_library= + test "$build_old_libs" = yes && old_library="$libname.$libext" + func_verbose "creating $output" + + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_for_eval "$var_value" + relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" + fi + done + # Quote the link command for shipping. + relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" + relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` + if test "$hardcode_automatic" = yes ; then + relink_command= + fi + + # Only create the output if not a dry run. + $opt_dry_run || { + for installed in no yes; do + if test "$installed" = yes; then + if test -z "$install_libdir"; then + break + fi + output="$output_objdir/$outputname"i + # Replace all uninstalled libtool libraries with the installed ones + newdependency_libs= + for deplib in $dependency_libs; do + case $deplib in + *.la) + func_basename "$deplib" + name="$func_basename_result" + func_resolve_sysroot "$deplib" + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result` + test -z "$libdir" && \ + func_fatal_error "\`$deplib' is not a valid libtool archive" + func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name" + ;; + -L*) + func_stripname -L '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -L$func_replace_sysroot_result" + ;; + -R*) + func_stripname -R '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -R$func_replace_sysroot_result" + ;; + *) func_append newdependency_libs " $deplib" ;; + esac + done + dependency_libs="$newdependency_libs" + newdlfiles= + + for lib in $dlfiles; do + case $lib in + *.la) + func_basename "$lib" + name="$func_basename_result" + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "\`$lib' is not a valid libtool archive" + func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name" + ;; + *) func_append newdlfiles " $lib" ;; + esac + done + dlfiles="$newdlfiles" + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + *.la) + # Only pass preopened files to the pseudo-archive (for + # eventual linking with the app. that links it) if we + # didn't already link the preopened objects directly into + # the library: + func_basename "$lib" + name="$func_basename_result" + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "\`$lib' is not a valid libtool archive" + func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name" + ;; + esac + done + dlprefiles="$newdlprefiles" + else + newdlfiles= + for lib in $dlfiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlfiles " $abs" + done + dlfiles="$newdlfiles" + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlprefiles " $abs" + done + dlprefiles="$newdlprefiles" + fi + $RM $output + # place dlname in correct position for cygwin + # In fact, it would be nice if we could use this code for all target + # systems that can't hard-code library paths into their executables + # and that have no shared library path variable independent of PATH, + # but it turns out we can't easily determine that from inspecting + # libtool variables, so we have to hard-code the OSs to which it + # applies here; at the moment, that means platforms that use the PE + # object format with DLL files. See the long comment at the top of + # tests/bindir.at for full details. + tdlname=$dlname + case $host,$output,$installed,$module,$dlname in + *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll) + # If a -bindir argument was supplied, place the dll there. + if test "x$bindir" != x ; + then + func_relative_path "$install_libdir" "$bindir" + tdlname=$func_relative_path_result$dlname + else + # Otherwise fall back on heuristic. + tdlname=../bin/$dlname + fi + ;; + esac + $ECHO > $output "\ +# $outputname - a libtool library file +# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='$tdlname' + +# Names of this library. +library_names='$library_names' + +# The name of the static archive. +old_library='$old_library' + +# Linker flags that can not go in dependency_libs. +inherited_linker_flags='$new_inherited_linker_flags' + +# Libraries that this one depends upon. +dependency_libs='$dependency_libs' + +# Names of additional weak libraries provided by this library +weak_library_names='$weak_libs' + +# Version information for $libname. +current=$current +age=$age +revision=$revision + +# Is this an already installed library? +installed=$installed + +# Should we warn about portability when linking against -modules? +shouldnotlink=$module + +# Files to dlopen/dlpreopen +dlopen='$dlfiles' +dlpreopen='$dlprefiles' + +# Directory that this library needs to be installed in: +libdir='$install_libdir'" + if test "$installed" = no && test "$need_relink" = yes; then + $ECHO >> $output "\ +relink_command=\"$relink_command\"" + fi + done + } + + # Do a symbolic link so that the libtool archive can be found in + # LD_LIBRARY_PATH before the program is installed. + func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?' + ;; + esac + exit $EXIT_SUCCESS +} + +{ test "$opt_mode" = link || test "$opt_mode" = relink; } && + func_mode_link ${1+"$@"} + + +# func_mode_uninstall arg... +func_mode_uninstall () +{ + $opt_debug + RM="$nonopt" + files= + rmforce= + exit_status=0 + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic="$magic" + + for arg + do + case $arg in + -f) func_append RM " $arg"; rmforce=yes ;; + -*) func_append RM " $arg" ;; + *) func_append files " $arg" ;; + esac + done + + test -z "$RM" && \ + func_fatal_help "you must specify an RM program" + + rmdirs= + + for file in $files; do + func_dirname "$file" "" "." + dir="$func_dirname_result" + if test "X$dir" = X.; then + odir="$objdir" + else + odir="$dir/$objdir" + fi + func_basename "$file" + name="$func_basename_result" + test "$opt_mode" = uninstall && odir="$dir" + + # Remember odir for removal later, being careful to avoid duplicates + if test "$opt_mode" = clean; then + case " $rmdirs " in + *" $odir "*) ;; + *) func_append rmdirs " $odir" ;; + esac + fi + + # Don't error if the file doesn't exist and rm -f was used. + if { test -L "$file"; } >/dev/null 2>&1 || + { test -h "$file"; } >/dev/null 2>&1 || + test -f "$file"; then + : + elif test -d "$file"; then + exit_status=1 + continue + elif test "$rmforce" = yes; then + continue + fi + + rmfiles="$file" + + case $name in + *.la) + # Possibly a libtool archive, so verify it. + if func_lalib_p "$file"; then + func_source $dir/$name + + # Delete the libtool libraries and symlinks. + for n in $library_names; do + func_append rmfiles " $odir/$n" + done + test -n "$old_library" && func_append rmfiles " $odir/$old_library" + + case "$opt_mode" in + clean) + case " $library_names " in + *" $dlname "*) ;; + *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;; + esac + test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i" + ;; + uninstall) + if test -n "$library_names"; then + # Do each command in the postuninstall commands. + func_execute_cmds "$postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1' + fi + + if test -n "$old_library"; then + # Do each command in the old_postuninstall commands. + func_execute_cmds "$old_postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1' + fi + # FIXME: should reinstall the best remaining shared library. + ;; + esac + fi + ;; + + *.lo) + # Possibly a libtool object, so verify it. + if func_lalib_p "$file"; then + + # Read the .lo file + func_source $dir/$name + + # Add PIC object to the list of files to remove. + if test -n "$pic_object" && + test "$pic_object" != none; then + func_append rmfiles " $dir/$pic_object" + fi + + # Add non-PIC object to the list of files to remove. + if test -n "$non_pic_object" && + test "$non_pic_object" != none; then + func_append rmfiles " $dir/$non_pic_object" + fi + fi + ;; + + *) + if test "$opt_mode" = clean ; then + noexename=$name + case $file in + *.exe) + func_stripname '' '.exe' "$file" + file=$func_stripname_result + func_stripname '' '.exe' "$name" + noexename=$func_stripname_result + # $file with .exe has already been added to rmfiles, + # add $file without .exe + func_append rmfiles " $file" + ;; + esac + # Do a test to see if this is a libtool program. + if func_ltwrapper_p "$file"; then + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + relink_command= + func_source $func_ltwrapper_scriptname_result + func_append rmfiles " $func_ltwrapper_scriptname_result" + else + relink_command= + func_source $dir/$noexename + fi + + # note $name still contains .exe if it was in $file originally + # as does the version of $file that was added into $rmfiles + func_append rmfiles " $odir/$name $odir/${name}S.${objext}" + if test "$fast_install" = yes && test -n "$relink_command"; then + func_append rmfiles " $odir/lt-$name" + fi + if test "X$noexename" != "X$name" ; then + func_append rmfiles " $odir/lt-${noexename}.c" + fi + fi + fi + ;; + esac + func_show_eval "$RM $rmfiles" 'exit_status=1' + done + + # Try to remove the ${objdir}s in the directories where we deleted files + for dir in $rmdirs; do + if test -d "$dir"; then + func_show_eval "rmdir $dir >/dev/null 2>&1" + fi + done + + exit $exit_status +} + +{ test "$opt_mode" = uninstall || test "$opt_mode" = clean; } && + func_mode_uninstall ${1+"$@"} + +test -z "$opt_mode" && { + help="$generic_help" + func_fatal_help "you must specify a MODE" +} + +test -z "$exec_cmd" && \ + func_fatal_help "invalid operation mode \`$opt_mode'" + +if test -n "$exec_cmd"; then + eval exec "$exec_cmd" + exit $EXIT_FAILURE +fi + +exit $exit_status + + +# The TAGs below are defined such that we never get into a situation +# in which we disable both kinds of libraries. Given conflicting +# choices, we go for a static library, that is the most portable, +# since we can't tell whether shared libraries were disabled because +# the user asked for that or because the platform doesn't support +# them. This is particularly important on AIX, because we don't +# support having both static and shared libraries enabled at the same +# time on that platform, so we default to a shared-only configuration. +# If a disable-shared tag is given, we'll fallback to a static-only +# configuration. But we'll never go from static-only to shared-only. + +# ### BEGIN LIBTOOL TAG CONFIG: disable-shared +build_libtool_libs=no +build_old_libs=yes +# ### END LIBTOOL TAG CONFIG: disable-shared + +# ### BEGIN LIBTOOL TAG CONFIG: disable-static +build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac` +# ### END LIBTOOL TAG CONFIG: disable-static + +# Local Variables: +# mode:shell-script +# sh-indentation:2 +# End: +# vi:sw=2 + diff --git a/vnet/missing b/vnet/missing new file mode 100755 index 00000000000..db98974ff5d --- /dev/null +++ b/vnet/missing @@ -0,0 +1,215 @@ +#! /bin/sh +# Common wrapper for a few potentially missing GNU programs. + +scriptversion=2013-10-28.13; # UTC + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try '$0 --help' for more information" + exit 1 +fi + +case $1 in + + --is-lightweight) + # Used by our autoconf macros to check whether the available missing + # script is modern enough. + exit 0 + ;; + + --run) + # Back-compat with the calling convention used by older automake. + shift + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due +to PROGRAM being missing or too old. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + +Supported PROGRAM values: + aclocal autoconf autoheader autom4te automake makeinfo + bison yacc flex lex help2man + +Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and +'g' are ignored when checking the name. + +Send bug reports to <bug-automake@gnu.org>." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: unknown '$1' option" + echo 1>&2 "Try '$0 --help' for more information" + exit 1 + ;; + +esac + +# Run the given program, remember its exit status. +"$@"; st=$? + +# If it succeeded, we are done. +test $st -eq 0 && exit 0 + +# Also exit now if we it failed (or wasn't found), and '--version' was +# passed; such an option is passed most likely to detect whether the +# program is present and works. +case $2 in --version|--help) exit $st;; esac + +# Exit code 63 means version mismatch. This often happens when the user +# tries to use an ancient version of a tool on a file that requires a +# minimum version. +if test $st -eq 63; then + msg="probably too old" +elif test $st -eq 127; then + # Program was missing. + msg="missing on your system" +else + # Program was found and executed, but failed. Give up. + exit $st +fi + +perl_URL=http://www.perl.org/ +flex_URL=http://flex.sourceforge.net/ +gnu_software_URL=http://www.gnu.org/software + +program_details () +{ + case $1 in + aclocal|automake) + echo "The '$1' program is part of the GNU Automake package:" + echo "<$gnu_software_URL/automake>" + echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/autoconf>" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + autoconf|autom4te|autoheader) + echo "The '$1' program is part of the GNU Autoconf package:" + echo "<$gnu_software_URL/autoconf/>" + echo "It also requires GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + esac +} + +give_advice () +{ + # Normalize program name to check for. + normalized_program=`echo "$1" | sed ' + s/^gnu-//; t + s/^gnu//; t + s/^g//; t'` + + printf '%s\n' "'$1' is $msg." + + configure_deps="'configure.ac' or m4 files included by 'configure.ac'" + case $normalized_program in + autoconf*) + echo "You should only need it if you modified 'configure.ac'," + echo "or m4 files included by it." + program_details 'autoconf' + ;; + autoheader*) + echo "You should only need it if you modified 'acconfig.h' or" + echo "$configure_deps." + program_details 'autoheader' + ;; + automake*) + echo "You should only need it if you modified 'Makefile.am' or" + echo "$configure_deps." + program_details 'automake' + ;; + aclocal*) + echo "You should only need it if you modified 'acinclude.m4' or" + echo "$configure_deps." + program_details 'aclocal' + ;; + autom4te*) + echo "You might have modified some maintainer files that require" + echo "the 'autom4te' program to be rebuilt." + program_details 'autom4te' + ;; + bison*|yacc*) + echo "You should only need it if you modified a '.y' file." + echo "You may want to install the GNU Bison package:" + echo "<$gnu_software_URL/bison/>" + ;; + lex*|flex*) + echo "You should only need it if you modified a '.l' file." + echo "You may want to install the Fast Lexical Analyzer package:" + echo "<$flex_URL>" + ;; + help2man*) + echo "You should only need it if you modified a dependency" \ + "of a man page." + echo "You may want to install the GNU Help2man package:" + echo "<$gnu_software_URL/help2man/>" + ;; + makeinfo*) + echo "You should only need it if you modified a '.texi' file, or" + echo "any other file indirectly affecting the aspect of the manual." + echo "You might want to install the Texinfo package:" + echo "<$gnu_software_URL/texinfo/>" + echo "The spurious makeinfo call might also be the consequence of" + echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" + echo "want to install GNU make:" + echo "<$gnu_software_URL/make/>" + ;; + *) + echo "You might have modified some files without having the proper" + echo "tools for further handling them. Check the 'README' file, it" + echo "often tells you about the needed prerequisites for installing" + echo "this package. You may also peek at any GNU archive site, in" + echo "case some other package contains this missing '$1' program." + ;; + esac +} + +give_advice "$1" | sed -e '1s/^/WARNING: /' \ + -e '2,$s/^/ /' >&2 + +# Propagate the correct exit status (expected to be 127 for a program +# not found, 63 for a program that failed due to version mismatch). +exit $st + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/vnet/vnet/api_errno.h b/vnet/vnet/api_errno.h new file mode 100644 index 00000000000..b4b55354650 --- /dev/null +++ b/vnet/vnet/api_errno.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_api_errno_h +#define included_vnet_api_errno_h + +#define foreach_vnet_api_error \ +_(UNSPECIFIED, -1, "Unspecified Error") \ +_(INVALID_SW_IF_INDEX, -2, "Invalid sw_if_index") \ +_(NO_SUCH_FIB, -3, "No such FIB / VRF") \ +_(NO_SUCH_INNER_FIB, -4, "No such inner FIB / VRF") \ +_(NO_SUCH_LABEL, -5, "No such label") \ +_(NO_SUCH_ENTRY, -6, "No such entry") \ +_(INVALID_VALUE, -7, "Invalid value") \ +_(INVALID_VALUE_2, -8, "Invalid value #2") \ +_(UNIMPLEMENTED, -9, "Unimplemented") \ +_(INVALID_SW_IF_INDEX_2, -10, "Invalid sw_if_index #2") \ +_(SYSCALL_ERROR_1, -11, "System call error #1") \ +_(SYSCALL_ERROR_2, -12, "System call error #2") \ +_(SYSCALL_ERROR_3, -13, "System call error #3") \ +_(SYSCALL_ERROR_4, -14, "System call error #4") \ +_(SYSCALL_ERROR_5, -15, "System call error #5") \ +_(SYSCALL_ERROR_6, -16, "System call error #6") \ +_(SYSCALL_ERROR_7, -17, "System call error #7") \ +_(SYSCALL_ERROR_8, -18, "System call error #8") \ +_(SYSCALL_ERROR_9, -19, "System call error #9") \ +_(SYSCALL_ERROR_10, -20, "System call error #9") \ +_(FEATURE_DISABLED, -30, "Feature disabled by configuration") \ +_(INVALID_REGISTRATION, -31, "Invalid registration") \ +_(NEXT_HOP_NOT_IN_FIB, -50, "Next hop not in FIB") \ +_(UNKNOWN_DESTINATION, -51, "Unknown destination") \ +_(PREFIX_MATCHES_NEXT_HOP, -52, "Prefix matches next hop") \ +_(NEXT_HOP_NOT_FOUND_MP, -53, "Next hop not found (multipath)") \ +_(NO_MATCHING_INTERFACE, -54, "No matching interface for probe") \ +_(INVALID_VLAN, -55, "Invalid VLAN") \ +_(VLAN_ALREADY_EXISTS, -56, "VLAN subif already exists") \ +_(INVALID_SRC_ADDRESS, -57, "Invalid src address") \ +_(INVALID_DST_ADDRESS, -58, "Invalid dst address") \ +_(ADDRESS_LENGTH_MISMATCH, -59, "Address length mismatch") \ +_(ADDRESS_NOT_FOUND_FOR_INTERFACE, -60, "Address not found for interface") \ +_(ADDRESS_NOT_LINK_LOCAL, -61, "Address not link-local") \ +_(IP6_NOT_ENABLED, -62, "ip6 not enabled") \ +_(ADDRESS_MATCHES_INTERFACE_ADDRESS, -63, "Address matches interface address") \ +_(IN_PROGRESS, 10, "Operation in progress") \ +_(NO_SUCH_NODE, -63, "No such graph node") \ +_(NO_SUCH_NODE2, -64, "No such graph node #2") \ +_(NO_SUCH_TABLE, -65, "No such table") \ +_(NO_SUCH_TABLE2, -66, "No such table #2") \ +_(NO_SUCH_TABLE3, -67, "No such table #3") \ +_(SUBIF_ALREADY_EXISTS, -68, "Subinterface already exists") \ +_(SUBIF_CREATE_FAILED, -69, "Subinterface creation failed") \ +_(INVALID_MEMORY_SIZE, -70, "Invalid memory size requested") \ +_(INVALID_INTERFACE, -71, "Invalid interface") \ +_(INVALID_VLAN_TAG_COUNT, -72, "Invalid number of tags for requested operation") \ +_(INVALID_ARGUMENT, -73, "Invalid argument") \ +_(UNEXPECTED_INTF_STATE, -74, "Unexpected interface state") \ +_(TUNNEL_EXIST, -75, "Tunnel already exists") \ +_(INVALID_DECAP_NEXT, -76, "Invalid decap-next") \ +_(RESPONSE_NOT_READY, -77, "Response not ready") \ +_(NOT_CONNECTED, -78, "Not connected to the data plane") + +typedef enum { +#define _(a,b,c) VNET_API_ERROR_##a = (b), + foreach_vnet_api_error +#undef _ + VNET_API_N_ERROR, +} vnet_api_error_t; + +#endif /* included_vnet_api_errno_h */ diff --git a/vnet/vnet/buffer.h b/vnet/vnet/buffer.h new file mode 100644 index 00000000000..9cbb402bd60 --- /dev/null +++ b/vnet/vnet/buffer.h @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * vnet/buffer.h: vnet buffer flags + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_buffer_h +#define included_vnet_buffer_h + +#include <vlib/vlib.h> + +/* VLIB buffer flags for ip4/ip6 packets. Set by input interfaces for ip4/ip6 + tcp/udp packets with hardware computed checksums. */ +#define LOG2_IP_BUFFER_L4_CHECKSUM_COMPUTED LOG2_VLIB_BUFFER_FLAG_USER(1) +#define LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT LOG2_VLIB_BUFFER_FLAG_USER(2) +#define IP_BUFFER_L4_CHECKSUM_COMPUTED (1 << LOG2_IP_BUFFER_L4_CHECKSUM_COMPUTED) +#define IP_BUFFER_L4_CHECKSUM_CORRECT (1 << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT) + +#define LOG2_HGSHM_BUFFER_USER_INDEX_VALID LOG2_VLIB_BUFFER_FLAG_USER(3) +#define VNET_HGSHM_BUFFER_USER_INDEX_VALID (1 << LOG2_HGSHM_BUFFER_USER_INDEX_VALID) + +#define foreach_buffer_opaque_union_subtype \ +_(ethernet) \ +_(ip) \ +_(mcast) \ +_(lb) \ +_(dlb) \ +_(swt) \ +_(l2) \ +_(l2t) \ +_(hgshm) \ +_(gre) \ +_(l2_classify) \ +_(io_handoff) \ +_(policer) \ +_(output_features) + +/* + * vnet stack buffer opaque array overlay structure. + * The vnet_buffer_opaque_t *must* be the same size as the + * vlib_buffer_t "opaque" structure member, 32 bytes. + * + * When adding a union type, please add a stanza to + * foreach_buffer_opaque_union_subtype (directly above). + * Code in vnet_interface_init(...) verifies the size + * of the union, and will announce any deviations in an + * impossible-to-miss manner. + */ +typedef struct { + u32 sw_if_index[VLIB_N_RX_TX]; + + union { + /* Ethernet. */ + struct { + /* Saved value of current header by ethernet-input. */ + i32 start_of_ethernet_header; + } ethernet; + + /* IP4/6 buffer opaque. */ + struct { + /* Adjacency from destination IP address lookup [VLIB_TX]. + Adjacency from source IP address lookup [VLIB_RX]. + This gets set to ~0 until source lookup is performed. */ + u32 adj_index[VLIB_N_RX_TX]; + + union { + struct { + /* Current configuration index. */ + u32 current_config_index; + + /* Flow hash value for this packet computed from IP src/dst address + protocol and ports. */ + u32 flow_hash; + + /* next protocol */ + u32 save_protocol; + }; + + /* Alternate used for local TCP packets. */ + struct { + u32 listener_index; + + u32 established_connection_index; + + u32 mini_connection_index; + } tcp; + }; + } ip; + + /* Multicast replication */ + struct { + u32 pad[3]; + u32 mcast_group_index; + u32 mcast_current_index; + u32 original_free_list_index; + } mcast; + + /* ipv6 shallow-pkt-inspection load-balancer, only valid there */ + struct { + u8 lb_disable; + u8 user_to_network; + u8 was_injected; + u32 bucket_id; + } lb; + /* ipv4 DPI load-balancer, only valid there */ + struct { + u8 lb_disable; + u8 user_to_network; + u32 session_index; + } dlb; + + /* ip4-in-ip6 softwire termination, only valid there */ + struct { + u8 swt_disable; + u32 mapping_index; + } swt; + + /* l2 bridging path, only valid there */ + struct { + u32 feature_bitmap; + u16 bd_index; // bridge-domain index + u8 l2_len; // ethernet header length + u8 shg; // split-horizon group + } l2; + + /* l2tpv3 softwire encap, only valid there */ + struct { + u32 pad[4]; /* do not overlay w/ ip.adj_index[0,1] */ + u8 next_index; + u32 session_index; + } l2t; + + /* hgshm, valid if packet sent through iface */ + struct { + u32 pad[8 -VLIB_N_RX_TX -1]; /* to end of opaque */ + u32 user_index; /* client id borrowing buffer */ + } hgshm; + + struct { + u32 src, dst; + } gre; + + /* L2 classify */ + struct { + u64 pad; + u32 opaque_index; + u32 table_index; + u64 hash; + } l2_classify; + + /* IO - worker thread handoff */ + struct { + u32 next_index; + } io_handoff; + + /* vnet policer */ + struct { + u32 pad[8 -VLIB_N_RX_TX -1]; /* to end of opaque */ + u32 index; + } policer; + + /* interface output features */ + struct { + u32 ipsec_spd_index; + u32 ipsec_sad_index; + u32 unused[3]; + u32 bitmap; + } output_features; + + /* vcgn udp inside input, only valid there */ + struct { + /* This part forms context of the packet. The structure should be + * exactly same as spp_ctx_t. Also this should be the first + * element of this vcgn_uii structure. + */ + /****** BEGIN spp_ctx_t section ***********************/ + union { /* Roddick specific */ + u32 roddick_info; + struct _tx_pkt_info { /* Used by PI to PI communication for TX */ + u32 uidb_index:16; /* uidb_index to transmit */ + u32 packet_type:2; /* 1-IPv4, 2-Ipv6, - 0,3 - Unused */ + u32 ipv4_defrag:1; /* 0 - Normal, 1 - update first + * segment size + * (set by 6rd defrag node) + */ + + u32 dst_ip_port_idx:4;/* Index to dst_ip_port_table */ + u32 from_node:4; + u32 calc_chksum:1; + u32 reserved:4; + } tx; + struct _rx_pkt_info { /* Used by PD / PI communication */ + u32 uidb_index:16; /* uidb_index received in packet */ + u32 packet_type:2; /* 1-IPv4, 2-Ipv6, - 0,3 - Unused */ + u32 icmp_type:1; /* 0-ICMP query type, 1-ICMP error type */ + u32 protocol_type:2; /* 1-TCP, 2-UDP, 3-ICMP, 0 - Unused */ + u32 ipv4_defrag:1; /* 0 - Normal, 1 - update first + * segment size + * (set by 6rd defrag node) + */ + + u32 direction:1; /* 0-Outside, 1-Inside */ + u32 frag:1; /*IP fragment-1, Otherwise-0*/ + u32 option:1; /* 0-No IP option (v4) present, non-fragHdr + * option hdr present (v6) + */ + u32 df_bit:1; /* IPv4 DF bit copied here */ + u32 reserved1:6; + } rx; + } ru; + /****** END spp_ctx_t section ***********************/ + + union { + struct { + u32 ipv4; + u16 port; + u16 vrf; //bit0-13:i/f, bit14-15:protocol + } k; + + u64 key64; + } key; + + u32 bucket; + + u16 ovrf; /* Exit interface */ + u8 frag_pkt; + u8 vcgn_unused1; + } vcgn_uii; + + /* MAP */ + struct { + u16 mtu; + } map; + + /* MAP-T */ + struct { + u32 map_domain_index; + struct { + u32 saddr, daddr; + u16 frag_offset; //Fragmentation header offset + u16 l4_offset; //L4 header overall offset + u8 l4_protocol; //The final protocol number + } v6; //Used by ip6_map_t only + u16 checksum_offset; //L4 checksum overall offset + u16 mtu; //Exit MTU + } map_t; + + /* IP Fragmentation */ + struct { + u16 header_offset; + u16 mtu; + u8 next_index; + u8 flags; //See ip_frag.h + } ip_frag; + + u32 unused[6]; + }; +} vnet_buffer_opaque_t; + +#define vnet_buffer(b) ((vnet_buffer_opaque_t *) (b)->opaque) + +/* Full cache line (64 bytes) of additional space */ +typedef struct { + union { + }; +} vnet_buffer_opaque2_t; + + + +#endif /* included_vnet_buffer_h */ diff --git a/vnet/vnet/classify/README b/vnet/vnet/classify/README new file mode 100644 index 00000000000..1ef5ab5ac34 --- /dev/null +++ b/vnet/vnet/classify/README @@ -0,0 +1,180 @@ +=== vnet classifier theory of operation === + +The vnet classifier trades off simplicity and perf / scale +characteristics. At a certain level, it's a dumb robot. Given an +incoming packet, search an ordered list of (mask, match) tables. If +the classifier finds a matching entry, take the indicated action. If +not, take a last-resort action. + +We use the MMX-unit to match or hash 16 octets at a time. For hardware +backward compatibility, the code does not [currently] use 256-bit +(32-octet) vector instructions. + +Effective use of the classifier centers around building table lists +which "hit" as soon as practicable. In many cases, established +sessions hit in the first table. In this mode of operation, the +classifier easily processes multiple MPPS / core - even with millions +of sessions in the data base. Searching 357 tables on a regular basis +will neatly solve the halting problem. + +==== Basic operation ==== + +The classifier mask-and-match operation proceeds as follows. Given a +starting classifier table index, lay hands on the indicated mask +vector. When building tables, we arrange for the mask to obey +mmx-unit (16-octet) alignment. + +We know that the first octet of packet data starts on a cache-line +boundary. Further, it's reasonably likely that folks won't want to use +the generalized classifier on the L2 header; preferring to decode the +Ethertype manually. That scheme makes it easy to select among ip4 / +ip6 / MPLS, etc. classifier table sets. + +A no-vlan-tag L2 header is 14 octets long. A typical ipv4 header +begins with the octets 0x4500: version=4, header_length=5, DSCP=0, +ECN=0. If one doesn't intend to classify on (DSCP, ECN) - the typical +case - we program the classifier to skip the first 16-octet vector. + +To classify untagged ipv4 packets on source address, we program the +classifier to skip one vector, and mask-and-match one vector. + +The basic match-and-match operation looks like this: + + switch (t->match_n_vectors) + { + case 1: + result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + break; + + case 2: + result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; + break; + + <etc> + } + + result_mask = u32x4_zero_byte_mask (result); + if (result_mask == 0xffff) + return (v); + +Net of setup, it costs a couple of clock cycles to mask-and-match 16 +octets. + +At the risk of belaboring an obvious point, the control-plane +'''must''' pay attention to detail. When skipping one (or more) +vectors, masks and matches must reflect that decision. See +.../vnet/vnet/classify/vnet_classify.c:unformat_classify_[mask|match]. Note +that vec_validate (xxx, 13) creates a 14-element vector. + +==== Creating a classifier table ==== + +To create a new classifier table via the control-plane API, send a +"classify_add_del_table" message. The underlying action routine, +vnet_classify_add_del_table(...), is located in +.../vnet/vnet/classify/vnet_classify.c, and has the following +prototype: + + int vnet_classify_add_del_table (vnet_classify_main_t * cm, + u8 * mask, + u32 nbuckets, + u32 memory_size, + u32 skip, + u32 match, + u32 next_table_index, + u32 miss_next_index, + u32 * table_index, + int is_add) + +Pass cm = &vnet_classify_main if calling this routine directly. Mask, +skip(_n_vectors) and match(_n_vectors) are as described above. Mask +need not be aligned, but it must be match*16 octets in length. To +avoid having your head explode, be absolutely certain that '''only''' +the bits you intend to match on are set. + +The classifier uses thread-safe, no-reader-locking-required +bounded-index extensible hashing. Nbuckets is the [fixed] size of the +hash bucket vector. The algorithm works in constant time regardless of +hash collisions, but wastes space when the bucket array is too +small. A good rule of thumb: let nbuckets = approximate number of +entries expected. + +At a signficant cost in complexity, it would be possible to resize the +bucket array dynamically. We have no plans to implement that function. + +Each classifier table has its own clib mheap memory allocation +arena. To pick the memory_size parameter, note that each classifier +table entry needs 16*(1 + match_n_vectors) bytes. Within reason, aim a +bit high. Clib mheap memory uses o/s level virtual memory - not wired +or hugetlb memory - so it's best not to scrimp on size. + +The "next_table_index" parameter is as described: the pool index in +vnet_classify_main.tables of the next table to search. Code ~0 to +indicate the end of the table list. 0 is a valid table index! + +We often create classification tables in reverse order - +last-table-searched to first-table-searched - so we can easily set +this parameter. Of course, one can manually adjust the data structure +after-the-fact. + +Specific classifier client nodes - for example, +.../vnet/vnet/classify/ip_classify.c - interpret the "miss_next_index" +parameter as a vpp graph-node next index. When packet classification +fails to produce a match, ip_classify_inline sends packets to the +indicated disposition. A classifier application might program this +parameter to send packets which don't match an existing session to a +"first-sign-of-life, create-new-session" node. + +Finally, the is_add parameter indicates whether to add or delete the +indicated table. The delete case implicitly terminates all sessions +with extreme prejudice, by freeing the specified clib mheap. + +==== Creating a classifier session ==== + +To create a new classifier session via the control-plane API, send a +"classify_add_del_session" message. The underlying action routine, +vnet_classify_add_del_session(...), is located in +.../vnet/vnet/classify/vnet_classify.c, and has the following +prototype: + +int vnet_classify_add_del_session (vnet_classify_main_t * cm, + u32 table_index, + u8 * match, + u32 hit_next_index, + u32 opaque_index, + i32 advance, + int is_add) + +Pass cm = &vnet_classify_main if calling this routine directly. Table +index specifies the table which receives the new session / contains +the session to delete depending on is_add. + +Match is the key for the indicated session. It need not be aligned, +but it must be table->match_n_vectors*16 octets in length. As a +courtesy, vnet_classify_add_del_session applies the table's mask to +the stored key-value. In this way, one can create a session by passing +unmasked (packet_data + offset) as the "match" parameter, and end up +with unconfusing session keys. + +Specific classifier client nodes - for example, +.../vnet/vnet/classify/ip_classify.c - interpret the per-session +hit_next_index parameter as a vpp graph-node next index. When packet +classification produces a match, ip_classify_inline sends packets to +the indicated disposition. + +ip4/6_classify place the per-session opaque_index parameter into +vnet_buffer(b)->l2_classify.opaque_index; a slight misnomer, but +anyhow classifier applications can send session-hit packets to +specific graph nodes, with useful values in buffer metadata. Depending +on the required semantics, we send known-session traffic to a certain +node, with e.g. a session pool index in buffer metadata. It's totally +up to the control-plane and the specific use-case. + +Finally, nodes such as ip4/6-classify apply the advance parameter as a +[signed!] argument to vlib_buffer_advance(...); to "consume" a +networking layer. Example: if we classify incoming tunneled IP packets +by (inner) source/dest address and source/dest port, we might choose +to decapsulate and reencapsulate the inner packet. In such a case, +program the advance parameter to perform the tunnel decapsulation, and +program next_index to send traffic to a node which uses +e.g. opaque_index to output traffic on a specific tunnel interface. diff --git a/vnet/vnet/classify/input_acl.c b/vnet/vnet/classify/input_acl.c new file mode 100644 index 00000000000..2c533d1170c --- /dev/null +++ b/vnet/vnet/classify/input_acl.c @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <vnet/classify/vnet_classify.h> +#include <vnet/classify/input_acl.h> + +input_acl_main_t input_acl_main; + +static int +vnet_inacl_ip_feature_enable (vlib_main_t * vnm, + input_acl_main_t *am, + u32 sw_if_index, + input_acl_table_id_t tid, + int feature_enable) +{ + + if (tid == INPUT_ACL_TABLE_L2) + { + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_ACL, + feature_enable); + } + else + { /* IP[46] */ + ip_lookup_main_t * lm; + ip_config_main_t * ipcm; + ip4_rx_feature_type_t ftype; + u32 ci; + + if (tid == INPUT_ACL_TABLE_IP4) + { + lm = &ip4_main.lookup_main; + ftype = IP4_RX_FEATURE_CHECK_ACCESS; + } + else + { + lm = &ip6_main.lookup_main; + ftype = IP6_RX_FEATURE_CHECK_ACCESS; + } + + ipcm = &lm->rx_config_mains[VNET_UNICAST]; + + ci = ipcm->config_index_by_sw_if_index[sw_if_index]; + ci = ((feature_enable) + ? vnet_config_add_feature + : vnet_config_del_feature) + (vnm, &ipcm->config_main, ci, ftype, + /* config data */ 0, + /* # bytes of config data */ 0); + + ipcm->config_index_by_sw_if_index[sw_if_index] = ci; + am->vnet_config_main[tid] = &ipcm->config_main; + } + + return 0; +} + +int vnet_set_input_acl_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 ip4_table_index, + u32 ip6_table_index, + u32 l2_table_index, u32 is_add) +{ + input_acl_main_t * am = &input_acl_main; + vnet_classify_main_t * vcm = am->vnet_classify_main; + u32 acl[INPUT_ACL_N_TABLES] = {ip4_table_index, ip6_table_index, + l2_table_index}; + u32 ti; + + /* Assume that we've validated sw_if_index in the API layer */ + + for (ti = 0; ti < INPUT_ACL_N_TABLES; ti++) + { + if (acl[ti] == ~0) + continue; + + if (pool_is_free_index (vcm->tables, acl[ti])) + return VNET_API_ERROR_NO_SUCH_TABLE; + + vec_validate_init_empty + (am->classify_table_index_by_sw_if_index[ti], sw_if_index, ~0); + + /* Reject any DEL operation with wrong sw_if_index */ + if (!is_add && + (acl[ti] != am->classify_table_index_by_sw_if_index[ti][sw_if_index])) + { + clib_warning ("Non-existent intf_idx=%d with table_index=%d for delete", + sw_if_index, acl[ti]); + return VNET_API_ERROR_NO_SUCH_TABLE; + } + + /* Return ok on ADD operaton if feature is already enabled */ + if (is_add && + am->classify_table_index_by_sw_if_index[ti][sw_if_index] != ~0) + return 0; + + vnet_inacl_ip_feature_enable (vm, am, sw_if_index, ti, is_add); + + if (is_add) + am->classify_table_index_by_sw_if_index[ti][sw_if_index] = acl[ti]; + else + am->classify_table_index_by_sw_if_index[ti][sw_if_index] = ~0; + } + + return 0; +} + +static clib_error_t * +set_input_acl_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 sw_if_index = ~0; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + u32 l2_table_index = ~0; + u32 is_add = 1; + u32 idx_cnt = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else if (unformat (input, "ip4-table %d", &ip4_table_index)) + idx_cnt++; + else if (unformat (input, "ip6-table %d", &ip6_table_index)) + idx_cnt++; + else if (unformat (input, "l2-table %d", &l2_table_index)) + idx_cnt++; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "Interface must be specified."); + + if (!idx_cnt) + return clib_error_return (0, "Table index should be specified."); + + if (idx_cnt > 1) + return clib_error_return (0, "Only one table index per API is allowed."); + + rv = vnet_set_input_acl_intfc (vm, sw_if_index, ip4_table_index, + ip6_table_index, l2_table_index, is_add); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_MATCHING_INTERFACE: + return clib_error_return (0, "No such interface"); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "No such classifier table"); + } + return 0; +} + +/* + * Configure interface to enable/disble input ACL feature: + * intfc - interface name to be configured as input ACL + * Ip4-table <index> [del] - enable/disable IP4 input ACL + * Ip6-table <index> [del] - enable/disable IP6 input ACL + * l2-table <index> [del] - enable/disable Layer2 input ACL + * + * Note: Only one table index per API call is allowed. + * + */ +VLIB_CLI_COMMAND (set_input_acl_command, static) = { + .path = "set interface input acl", + .short_help = + "set interface input acl intfc <int> [ip4-table <index>]\n" + " [ip6-table <index>] [l2-table <index>] [del]", + .function = set_input_acl_command_fn, +}; + +clib_error_t *input_acl_init (vlib_main_t *vm) +{ + input_acl_main_t * am = &input_acl_main; + clib_error_t * error = 0; + + if ((error = vlib_call_init_function (vm, ip_inacl_init))) + return error; + + am->vlib_main = vm; + am->vnet_main = vnet_get_main(); + am->vnet_classify_main = &vnet_classify_main; + + return 0; +} + +VLIB_INIT_FUNCTION (input_acl_init); + +uword unformat_acl_type (unformat_input_t * input, va_list * args) +{ + u32 * acl_type = va_arg (*args, u32 *); + u32 tid = INPUT_ACL_N_TABLES; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "ip4")) + tid = INPUT_ACL_TABLE_IP4; + else if (unformat (input, "ip6")) + tid = INPUT_ACL_TABLE_IP6; + else if (unformat (input, "l2")) + tid = INPUT_ACL_TABLE_L2; + else + break; + } + + *acl_type = tid; + return 1; +} + +u8 * format_vnet_inacl_info (u8 * s, va_list * va) +{ + input_acl_main_t * am = va_arg (*va, input_acl_main_t *); + int sw_if_idx = va_arg (*va, int); + u32 tid = va_arg (*va, u32); + + if (tid == ~0) + { + s = format (s, "%10s%20s\t\t%s", "Intfc idx", "Classify table", + "Interface name"); + return s; + } + + s = format (s, "%10d%20d\t\t%U", sw_if_idx, tid, + format_vnet_sw_if_index_name, am->vnet_main, sw_if_idx); + + return s; +} + +static clib_error_t * +show_inacl_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + input_acl_main_t * am = &input_acl_main; + u32 type = INPUT_ACL_N_TABLES; + int i; + u32 * vec_tbl; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "type %U", unformat_acl_type, &type)) + ; + else + break; + } + + if (type == INPUT_ACL_N_TABLES) + return clib_error_return (0, "Invalid input ACL table type."); + + vec_tbl = am->classify_table_index_by_sw_if_index[type]; + + if (vec_len(vec_tbl)) + vlib_cli_output (vm, "%U", format_vnet_inacl_info, am, ~0 /* hdr */, ~0); + else + vlib_cli_output (vm, "No input ACL tables configured"); + + for (i = 0; i < vec_len (vec_tbl); i++) + { + if (vec_elt(vec_tbl, i) == ~0) + continue; + + vlib_cli_output (vm, "%U", format_vnet_inacl_info, + am, i, vec_elt(vec_tbl, i)); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_inacl_command, static) = { + .path = "show inacl", + .short_help = "show inacl type [ip4|ip6|l2]", + .function = show_inacl_command_fn, +}; diff --git a/vnet/vnet/classify/input_acl.h b/vnet/vnet/classify/input_acl.h new file mode 100644 index 00000000000..7ffc189f053 --- /dev/null +++ b/vnet/vnet/classify/input_acl.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_vnet_input_acl_h__ +#define __included_vnet_input_acl_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/classify/vnet_classify.h> + +typedef enum { + INPUT_ACL_TABLE_IP4, + INPUT_ACL_TABLE_IP6, + INPUT_ACL_TABLE_L2, + INPUT_ACL_N_TABLES, +} input_acl_table_id_t; + +typedef enum { + ACL_NEXT_INDEX_DENY, + ACL_NEXT_INDEX_N_NEXT, +} acl_next_index_t; + +typedef struct { + + /* classifier table vectors */ + u32 * classify_table_index_by_sw_if_index [INPUT_ACL_N_TABLES]; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + vnet_classify_main_t * vnet_classify_main; + vnet_config_main_t * vnet_config_main [INPUT_ACL_N_TABLES]; +} input_acl_main_t; + +extern input_acl_main_t input_acl_main; + +int vnet_set_input_acl_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 ip4_table_index, + u32 ip6_table_index, + u32 l2_table_index, u32 is_add); + +#endif /* __included_vnet_input_acl_h__ */ diff --git a/vnet/vnet/classify/ip_classify.c b/vnet/vnet/classify/ip_classify.c new file mode 100644 index 00000000000..c922608547c --- /dev/null +++ b/vnet/vnet/classify/ip_classify.c @@ -0,0 +1,384 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */ +#include <vnet/classify/vnet_classify.h> + +typedef struct { + u32 next_index; + u32 table_index; + u32 entry_index; +} ip_classify_trace_t; + +/* packet trace format function */ +static u8 * format_ip_classify_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip_classify_trace_t * t = va_arg (*args, ip_classify_trace_t *); + + s = format (s, "IP_CLASSIFY: next_index %d, table %d, entry %d", + t->next_index, t->table_index, t->entry_index); + return s; +} + +vlib_node_registration_t ip4_classify_node; +vlib_node_registration_t ip6_classify_node; + +#define foreach_ip_classify_error \ +_(MISS, "Classify misses") \ +_(HIT, "Classify hits") \ +_(CHAIN_HIT, "Classify hits after chain walk") + +typedef enum { +#define _(sym,str) IP_CLASSIFY_ERROR_##sym, + foreach_ip_classify_error +#undef _ + IP_CLASSIFY_N_ERROR, +} ip_classify_error_t; + +static char * ip_classify_error_strings[] = { +#define _(sym,string) string, + foreach_ip_classify_error +#undef _ +}; + +static inline uword +ip_classify_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int is_ip4) +{ + u32 n_left_from, * from, * to_next; + ip_lookup_next_t next_index; + vnet_classify_main_t * vcm = &vnet_classify_main; + ip_lookup_main_t * lm; + f64 now = vlib_time_now (vm); + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + + if (is_ip4) + lm = &ip4_main.lookup_main; + else + lm = &ip6_main.lookup_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + /* First pass: compute hashes */ + + while (n_left_from > 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, bi1; + u8 * h0, * h1; + u32 adj_index0, adj_index1; + ip_adjacency_t * adj0, * adj1; + u32 table_index0, table_index1; + vnet_classify_table_t * t0, * t1; + + /* prefetch next iteration */ + { + vlib_buffer_t * p1, * p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + h1 = b1->data; + + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + adj0 = ip_get_adjacency (lm, adj_index0); + table_index0 = adj0->classify_table_index; + + adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; + adj1 = ip_get_adjacency (lm, adj_index1); + table_index1 = adj1->classify_table_index; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + + t1 = pool_elt_at_index (vcm->tables, table_index1); + + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + vnet_buffer(b1)->l2_classify.hash = + vnet_classify_hash_packet (t1, (u8 *) h1); + + vnet_classify_prefetch_bucket (t1, vnet_buffer(b1)->l2_classify.hash); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + + vnet_buffer(b1)->l2_classify.table_index = table_index1; + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t * b0; + u32 bi0; + u8 * h0; + u32 adj_index0; + ip_adjacency_t * adj0; + u32 table_index0; + vnet_classify_table_t * t0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + adj0 = ip_get_adjacency (lm, adj_index0); + table_index0 = adj0->classify_table_index; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = IP_LOOKUP_NEXT_MISS; + u32 table_index0; + vnet_classify_table_t * t0; + vnet_classify_entry_t * e0; + u64 hash0; + u8 * h0; + + /* Stride 3 seems to work best */ + if (PREDICT_TRUE (n_left_from > 3)) + { + vlib_buffer_t * p1 = vlib_get_buffer(vm, from[3]); + vnet_classify_table_t * tp1; + u32 table_index1; + u64 phash1; + + table_index1 = vnet_buffer(p1)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index1 != ~0)) + { + tp1 = pool_elt_at_index (vcm->tables, table_index1); + phash1 = vnet_buffer(p1)->l2_classify.hash; + vnet_classify_prefetch_entry (tp1, phash1); + } + } + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + table_index0 = vnet_buffer(b0)->l2_classify.table_index; + e0 = 0; + t0 = 0; + vnet_buffer(b0)->l2_classify.opaque_index = ~0; + + if (PREDICT_TRUE(table_index0 != ~0)) + { + hash0 = vnet_buffer(b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, + now); + if (e0) + { + vnet_buffer(b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < IP_LOOKUP_N_NEXT)? + e0->next_index:next0; + hits++; + } + else + { + while (1) + { + if (t0->next_table_index != ~0) + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + else + { + next0 = (t0->miss_next_index < IP_LOOKUP_N_NEXT)? + t0->miss_next_index:next0; + misses++; + break; + } + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry + (t0, (u8 *) h0, hash0, now); + if (e0) + { + vnet_buffer(b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < IP_LOOKUP_N_NEXT)? + e0->next_index:next0; + hits++; + chain_hits++; + break; + } + } + } + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip_classify_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->table_index = t0 ? t0 - vcm->tables : ~0; + t->entry_index = e0 ? e0 - t0->entries : ~0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + IP_CLASSIFY_ERROR_MISS, + misses); + vlib_node_increment_counter (vm, node->node_index, + IP_CLASSIFY_ERROR_HIT, + hits); + vlib_node_increment_counter (vm, node->node_index, + IP_CLASSIFY_ERROR_CHAIN_HIT, + chain_hits); + return frame->n_vectors; +} + +static uword +ip4_classify (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip_classify_inline (vm, node, frame, 1 /* is_ip4 */); +} + + +VLIB_REGISTER_NODE (ip4_classify_node) = { + .function = ip4_classify, + .name = "ip4-classify", + .vector_size = sizeof (u32), + .format_trace = format_ip_classify_trace, + .n_errors = ARRAY_LEN(ip_classify_error_strings), + .error_strings = ip_classify_error_strings, + + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = { + [IP_LOOKUP_NEXT_MISS] = "ip4-miss", + [IP_LOOKUP_NEXT_DROP] = "ip4-drop", + [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", + [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", + [IP_LOOKUP_NEXT_ARP] = "ip4-arp", + [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", + [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify", /* probably not... */ + [IP_LOOKUP_NEXT_MAP] = "ip4-map", + [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t", + [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd", + [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop", + [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", + [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", + }, +}; + +static uword +ip6_classify (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip_classify_inline (vm, node, frame, 0 /* is_ip4 */); +} + + +VLIB_REGISTER_NODE (ip6_classify_node) = { + .function = ip6_classify, + .name = "ip6-classify", + .vector_size = sizeof (u32), + .format_trace = format_ip_classify_trace, + .n_errors = ARRAY_LEN(ip_classify_error_strings), + .error_strings = ip_classify_error_strings, + + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = { + [IP_LOOKUP_NEXT_MISS] = "ip6-miss", + [IP_LOOKUP_NEXT_DROP] = "ip6-drop", + [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", + [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", + [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", + [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", + [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", /* probably not... */ + [IP_LOOKUP_NEXT_MAP] = "ip6-map", + [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", + [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd", + [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", + [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", + [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", + }, +}; + +static clib_error_t * +ip_classify_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (ip_classify_init); diff --git a/vnet/vnet/classify/vnet_classify.c b/vnet/vnet/classify/vnet_classify.c new file mode 100644 index 00000000000..43acb024033 --- /dev/null +++ b/vnet/vnet/classify/vnet_classify.c @@ -0,0 +1,1895 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/classify/vnet_classify.h> +#include <vnet/classify/input_acl.h> +#include <vnet/ip/ip.h> +#include <vnet/api_errno.h> /* for API error numbers */ +#include <vnet/l2/l2_classify.h> /* for L2_CLASSIFY_NEXT_xxx */ + +#if VALIDATION_SCAFFOLDING +/* Validation scaffolding */ +void mv (vnet_classify_table_t * t) +{ + void * oldheap; + + oldheap = clib_mem_set_heap (t->mheap); + clib_mem_validate(); + clib_mem_set_heap (oldheap); +} + +void rogue (vnet_classify_table_t * t) +{ + int i, j, k; + vnet_classify_entry_t * v, * save_v; + u32 active_elements = 0; + vnet_classify_bucket_t * b; + + for (i = 0; i < t->nbuckets; i++) + { + b = &t->buckets [i]; + if (b->offset == 0) + continue; + save_v = vnet_classify_get_entry (t, b->offset); + for (j = 0; j < (1<<b->log2_pages); j++) + { + for (k = 0; k < t->entries_per_page; k++) + { + v = vnet_classify_entry_at_index + (t, save_v, j*t->entries_per_page + k); + + if (vnet_classify_entry_is_busy (v)) + active_elements++; + } + } + } + + if (active_elements != t->active_elements) + clib_warning ("found %u expected %u elts", active_elements, + t->active_elements); +} +#else +void mv (vnet_classify_table_t * t) { } +void rogue (vnet_classify_table_t * t) { } +#endif + +vnet_classify_table_t * +vnet_classify_new_table (vnet_classify_main_t *cm, + u8 * mask, u32 nbuckets, u32 memory_size, + u32 skip_n_vectors, + u32 match_n_vectors) +{ + vnet_classify_table_t * t; + void * oldheap; + + nbuckets = 1 << (max_log2 (nbuckets)); + + pool_get_aligned (cm->tables, t, CLIB_CACHE_LINE_BYTES); + memset(t, 0, sizeof (*t)); + + vec_validate_aligned (t->mask, match_n_vectors - 1, sizeof(u32x4)); + memcpy (t->mask, mask, match_n_vectors * sizeof (u32x4)); + + t->next_table_index = ~0; + t->nbuckets = nbuckets; + t->log2_nbuckets = max_log2 (nbuckets); + t->match_n_vectors = match_n_vectors; + t->skip_n_vectors = skip_n_vectors; + t->entries_per_page = 2; + + t->mheap = mheap_alloc (0 /* use VM */, memory_size); + + vec_validate_aligned (t->buckets, nbuckets - 1, CLIB_CACHE_LINE_BYTES); + oldheap = clib_mem_set_heap (t->mheap); + + t->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + + clib_mem_set_heap (oldheap); + return (t); +} + +void vnet_classify_delete_table_index (vnet_classify_main_t *cm, + u32 table_index) +{ + vnet_classify_table_t * t; + + /* Tolerate multiple frees, up to a point */ + if (pool_is_free_index (cm->tables, table_index)) + return; + + t = pool_elt_at_index (cm->tables, table_index); + if (t->next_table_index != ~0) + vnet_classify_delete_table_index (cm, t->next_table_index); + + vec_free (t->mask); + vec_free (t->buckets); + mheap_free (t->mheap); + + pool_put (cm->tables, t); +} + +static vnet_classify_entry_t * +vnet_classify_entry_alloc (vnet_classify_table_t * t, u32 log2_pages) +{ + vnet_classify_entry_t * rv = 0; +#define _(size) \ + vnet_classify_entry_##size##_t * rv##size = 0; + foreach_size_in_u32x4; +#undef _ + + void * oldheap; + + ASSERT (t->writer_lock[0]); + if (log2_pages >= vec_len (t->freelists) || t->freelists [log2_pages] == 0) + { + oldheap = clib_mem_set_heap (t->mheap); + + vec_validate (t->freelists, log2_pages); + + switch(t->match_n_vectors) + { + /* Euchre the vector allocator into allocating the right sizes */ +#define _(size) \ + case size: \ + vec_validate_aligned \ + (rv##size, ((1<<log2_pages)*t->entries_per_page) - 1, \ + CLIB_CACHE_LINE_BYTES); \ + rv = (vnet_classify_entry_t *) rv##size; \ + break; + foreach_size_in_u32x4; +#undef _ + + default: + abort(); + } + + clib_mem_set_heap (oldheap); + goto initialize; + } + rv = t->freelists[log2_pages]; + t->freelists[log2_pages] = rv->next_free; + +initialize: + ASSERT(rv); + ASSERT (vec_len(rv) == (1<<log2_pages)*t->entries_per_page); + + switch (t->match_n_vectors) + { +#define _(size) \ + case size: \ + if(vec_len(rv)) \ + memset (rv, 0xff, sizeof (*rv##size) * vec_len(rv)); \ + break; + foreach_size_in_u32x4; +#undef _ + + default: + abort(); + } + + return rv; +} + +static void +vnet_classify_entry_free (vnet_classify_table_t * t, + vnet_classify_entry_t * v) +{ + u32 free_list_index; + + ASSERT (t->writer_lock[0]); + + free_list_index = min_log2(vec_len(v)/t->entries_per_page); + + ASSERT(vec_len (t->freelists) > free_list_index); + + v->next_free = t->freelists[free_list_index]; + t->freelists[free_list_index] = v; +} + +static inline void make_working_copy +(vnet_classify_table_t * t, vnet_classify_bucket_t * b) +{ + vnet_classify_entry_t * v; + vnet_classify_bucket_t working_bucket __attribute__((aligned (8))); + void * oldheap; + vnet_classify_entry_t * working_copy; +#define _(size) \ + vnet_classify_entry_##size##_t * working_copy##size = 0; + foreach_size_in_u32x4; +#undef _ + u32 cpu_number = os_get_cpu_number(); + + if (cpu_number >= vec_len (t->working_copies)) + { + oldheap = clib_mem_set_heap (t->mheap); + vec_validate (t->working_copies, cpu_number); + clib_mem_set_heap (oldheap); + } + + /* + * working_copies are per-cpu so that near-simultaneous + * updates from multiple threads will not result in sporadic, spurious + * lookup failures. + */ + working_copy = t->working_copies[cpu_number]; + + t->saved_bucket.as_u64 = b->as_u64; + oldheap = clib_mem_set_heap (t->mheap); + + if ((1<<b->log2_pages)*t->entries_per_page > vec_len (working_copy)) + { + switch(t->match_n_vectors) + { + /* Euchre the vector allocator into allocating the right sizes */ +#define _(size) \ + case size: \ + working_copy##size = (void *) working_copy; \ + vec_validate_aligned \ + (working_copy##size, \ + ((1<<b->log2_pages)*t->entries_per_page) - 1, \ + CLIB_CACHE_LINE_BYTES); \ + working_copy = (void *) working_copy##size; \ + break; + foreach_size_in_u32x4; +#undef _ + + default: + abort(); + } + t->working_copies[cpu_number] = working_copy; + } + + _vec_len(working_copy) = (1<<b->log2_pages)*t->entries_per_page; + clib_mem_set_heap (oldheap); + + v = vnet_classify_get_entry (t, b->offset); + + switch(t->match_n_vectors) + { +#define _(size) \ + case size: \ + memcpy (working_copy, v, \ + sizeof (vnet_classify_entry_##size##_t) \ + * (1<<b->log2_pages) \ + * (t->entries_per_page)); \ + break; + foreach_size_in_u32x4 ; +#undef _ + + default: + abort(); + } + + working_bucket.as_u64 = b->as_u64; + working_bucket.offset = vnet_classify_get_offset (t, working_copy); + CLIB_MEMORY_BARRIER(); + b->as_u64 = working_bucket.as_u64; + t->working_copies[cpu_number] = working_copy; +} + +static vnet_classify_entry_t * +split_and_rehash (vnet_classify_table_t * t, + vnet_classify_entry_t * old_values, + u32 new_log2_pages) +{ + vnet_classify_entry_t * new_values, * v, * new_v; + int i, j, k; + + new_values = vnet_classify_entry_alloc (t, new_log2_pages); + + for (i = 0; i < (vec_len (old_values)/t->entries_per_page); i++) + { + u64 new_hash; + + for (j = 0; j < t->entries_per_page; j++) + { + v = vnet_classify_entry_at_index + (t, old_values, i * t->entries_per_page + j); + + if (vnet_classify_entry_is_busy (v)) + { + /* Hack so we can use the packet hash routine */ + u8 * key_minus_skip; + key_minus_skip = (u8 *) v->key; + key_minus_skip -= t->skip_n_vectors * sizeof (u32x4); + + new_hash = vnet_classify_hash_packet (t, key_minus_skip); + new_hash >>= t->log2_nbuckets; + new_hash &= (1<<new_log2_pages) - 1; + + for (k = 0; k < t->entries_per_page; k++) + { + new_v = vnet_classify_entry_at_index (t, new_values, + new_hash + k); + + if (vnet_classify_entry_is_free (new_v)) + { + memcpy (new_v, v, sizeof (vnet_classify_entry_t) + + (t->match_n_vectors * sizeof (u32x4))); + new_v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE); + goto doublebreak; + } + } + /* Crap. Tell caller to try again */ + vnet_classify_entry_free (t, new_values); + return 0; + } + doublebreak: + ; + } + } + return new_values; +} + +int vnet_classify_add_del (vnet_classify_table_t * t, + vnet_classify_entry_t * add_v, + int is_add) +{ + u32 bucket_index; + vnet_classify_bucket_t * b, tmp_b; + vnet_classify_entry_t * v, * new_v, * save_new_v, * working_copy, * save_v; + u32 value_index; + int rv = 0; + int i; + u64 hash, new_hash; + u32 new_log2_pages; + u32 cpu_number = os_get_cpu_number(); + u8 * key_minus_skip; + + ASSERT ((add_v->flags & VNET_CLASSIFY_ENTRY_FREE) == 0); + + key_minus_skip = (u8 *) add_v->key; + key_minus_skip -= t->skip_n_vectors * sizeof (u32x4); + + hash = vnet_classify_hash_packet (t, key_minus_skip); + + bucket_index = hash & (t->nbuckets-1); + b = &t->buckets[bucket_index]; + + hash >>= t->log2_nbuckets; + + while (__sync_lock_test_and_set (t->writer_lock, 1)) + ; + + /* First elt in the bucket? */ + if (b->offset == 0) + { + if (is_add == 0) + { + rv = -1; + goto unlock; + } + + v = vnet_classify_entry_alloc (t, 0 /* new_log2_pages */); + memcpy (v, add_v, sizeof (vnet_classify_entry_t) + + t->match_n_vectors * sizeof (u32x4)); + v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE); + + tmp_b.as_u64 = 0; + tmp_b.offset = vnet_classify_get_offset (t, v); + + b->as_u64 = tmp_b.as_u64; + t->active_elements ++; + + goto unlock; + } + + make_working_copy (t, b); + + save_v = vnet_classify_get_entry (t, t->saved_bucket.offset); + value_index = hash & ((1<<t->saved_bucket.log2_pages)-1); + + if (is_add) + { + /* + * For obvious (in hindsight) reasons, see if we're supposed to + * replace an existing key, then look for an empty slot. + */ + + for (i = 0; i < t->entries_per_page; i++) + { + v = vnet_classify_entry_at_index (t, save_v, value_index + i); + + if (!memcmp (v->key, add_v->key, t->match_n_vectors * sizeof (u32x4))) + { + memcpy (v, add_v, sizeof (vnet_classify_entry_t) + + t->match_n_vectors * sizeof(u32x4)); + v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE); + + CLIB_MEMORY_BARRIER(); + /* Restore the previous (k,v) pairs */ + b->as_u64 = t->saved_bucket.as_u64; + goto unlock; + } + } + for (i = 0; i < t->entries_per_page; i++) + { + v = vnet_classify_entry_at_index (t, save_v, value_index + i); + + if (vnet_classify_entry_is_free (v)) + { + memcpy (v, add_v, sizeof (vnet_classify_entry_t) + + t->match_n_vectors * sizeof(u32x4)); + v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE); + CLIB_MEMORY_BARRIER(); + b->as_u64 = t->saved_bucket.as_u64; + t->active_elements ++; + goto unlock; + } + } + /* no room at the inn... split case... */ + } + else + { + for (i = 0; i < t->entries_per_page; i++) + { + v = vnet_classify_entry_at_index (t, save_v, value_index + i); + + if (!memcmp (v->key, add_v->key, t->match_n_vectors * sizeof (u32x4))) + { + memset (v, 0xff, sizeof (vnet_classify_entry_t) + + t->match_n_vectors * sizeof(u32x4)); + v->flags |= VNET_CLASSIFY_ENTRY_FREE; + CLIB_MEMORY_BARRIER(); + b->as_u64 = t->saved_bucket.as_u64; + t->active_elements --; + goto unlock; + } + } + rv = -3; + b->as_u64 = t->saved_bucket.as_u64; + goto unlock; + } + + new_log2_pages = t->saved_bucket.log2_pages + 1; + + expand_again: + working_copy = t->working_copies[cpu_number]; + new_v = split_and_rehash (t, working_copy, new_log2_pages); + + if (new_v == 0) + { + new_log2_pages++; + goto expand_again; + } + + /* Try to add the new entry */ + save_new_v = new_v; + + key_minus_skip = (u8 *) add_v->key; + key_minus_skip -= t->skip_n_vectors * sizeof (u32x4); + + new_hash = vnet_classify_hash_packet_inline (t, key_minus_skip); + new_hash >>= t->log2_nbuckets; + new_hash &= (1<<min_log2((vec_len(new_v)/t->entries_per_page))) - 1; + + for (i = 0; i < t->entries_per_page; i++) + { + new_v = vnet_classify_entry_at_index (t, save_new_v, new_hash + i); + + if (vnet_classify_entry_is_free (new_v)) + { + memcpy (new_v, add_v, sizeof (vnet_classify_entry_t) + + t->match_n_vectors * sizeof(u32x4)); + new_v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE); + goto expand_ok; + } + } + /* Crap. Try again */ + new_log2_pages++; + vnet_classify_entry_free (t, save_new_v); + goto expand_again; + + expand_ok: + tmp_b.log2_pages = min_log2 (vec_len (save_new_v)/t->entries_per_page); + tmp_b.offset = vnet_classify_get_offset (t, save_new_v); + CLIB_MEMORY_BARRIER(); + b->as_u64 = tmp_b.as_u64; + t->active_elements ++; + v = vnet_classify_get_entry (t, t->saved_bucket.offset); + vnet_classify_entry_free (t, v); + + unlock: + CLIB_MEMORY_BARRIER(); + t->writer_lock[0] = 0; + + return rv; +} + +typedef CLIB_PACKED(struct { + ethernet_header_t eh; + ip4_header_t ip; +}) classify_data_or_mask_t; + +u64 vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h) +{ + return vnet_classify_hash_packet_inline (t, h); +} + +vnet_classify_entry_t * +vnet_classify_find_entry (vnet_classify_table_t * t, + u8 * h, u64 hash, f64 now) +{ + return vnet_classify_find_entry_inline (t, h, hash, now); +} + +static u8 * format_classify_entry (u8 * s, va_list * args) + { + vnet_classify_table_t * t = va_arg (*args, vnet_classify_table_t *); + vnet_classify_entry_t * e = va_arg (*args, vnet_classify_entry_t *); + + s = format + (s, "[%u]: next_index %d advance %d opaque %d\n", + vnet_classify_get_offset (t, e), e->next_index, e->advance, + e->opaque_index); + + + s = format (s, " k: %U\n", format_hex_bytes, e->key, + t->match_n_vectors * sizeof(u32x4)); + + if (vnet_classify_entry_is_busy (e)) + s = format (s, " hits %lld, last_heard %.2f\n", + e->hits, e->last_heard); + else + s = format (s, " entry is free\n"); + return s; + } + +u8 * format_classify_table (u8 * s, va_list * args) +{ + vnet_classify_table_t * t = va_arg (*args, vnet_classify_table_t *); + int verbose = va_arg (*args, int); + vnet_classify_bucket_t * b; + vnet_classify_entry_t * v, * save_v; + int i, j, k; + u64 active_elements = 0; + + for (i = 0; i < t->nbuckets; i++) + { + b = &t->buckets [i]; + if (b->offset == 0) + { + if (verbose > 1) + s = format (s, "[%d]: empty\n", i); + continue; + } + + if (verbose) + { + s = format (s, "[%d]: heap offset %d, len %d\n", i, + b->offset, (1<<b->log2_pages)); + } + + save_v = vnet_classify_get_entry (t, b->offset); + for (j = 0; j < (1<<b->log2_pages); j++) + { + for (k = 0; k < t->entries_per_page; k++) + { + + v = vnet_classify_entry_at_index (t, save_v, + j*t->entries_per_page + k); + + if (vnet_classify_entry_is_free (v)) + { + if (verbose > 1) + s = format (s, " %d: empty\n", + j * t->entries_per_page + k); + continue; + } + if (verbose) + { + s = format (s, " %d: %U\n", + j * t->entries_per_page + k, + format_classify_entry, t, v); + } + active_elements++; + } + } + } + + s = format (s, " %lld active elements\n", active_elements); + s = format (s, " %d free lists\n", vec_len (t->freelists)); + return s; +} + +int vnet_classify_add_del_table (vnet_classify_main_t * cm, + u8 * mask, + u32 nbuckets, + u32 memory_size, + u32 skip, + u32 match, + u32 next_table_index, + u32 miss_next_index, + u32 * table_index, + int is_add) +{ + vnet_classify_table_t * t; + + if (is_add) + { + *table_index = ~0; + if (memory_size == 0) + return VNET_API_ERROR_INVALID_MEMORY_SIZE; + + if (nbuckets == 0) + return VNET_API_ERROR_INVALID_VALUE; + + t = vnet_classify_new_table (cm, mask, nbuckets, memory_size, + skip, match); + t->next_table_index = next_table_index; + t->miss_next_index = miss_next_index; + *table_index = t - cm->tables; + return 0; + } + + vnet_classify_delete_table_index (cm, *table_index); + return 0; +} + +#define foreach_ip4_proto_field \ +_(src_address) \ +_(dst_address) \ +_(tos) \ +_(length) \ +_(fragment_id) \ +_(ttl) \ +_(protocol) \ +_(checksum) + +uword unformat_ip4_mask (unformat_input_t * input, va_list * args) +{ + u8 ** maskp = va_arg (*args, u8 **); + u8 * mask = 0; + u8 found_something = 0; + ip4_header_t * ip; + +#define _(a) u8 a=0; + foreach_ip4_proto_field; +#undef _ + u8 version = 0; + u8 hdr_length = 0; + + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "version")) + version = 1; + else if (unformat (input, "hdr_length")) + hdr_length = 1; + else if (unformat (input, "src")) + src_address = 1; + else if (unformat (input, "dst")) + dst_address = 1; + else if (unformat (input, "proto")) + protocol = 1; + +#define _(a) else if (unformat (input, #a)) a=1; + foreach_ip4_proto_field +#undef _ + else + break; + } + +#define _(a) found_something += a; + foreach_ip4_proto_field; +#undef _ + + if (found_something == 0) + return 0; + + vec_validate (mask, sizeof (*ip) - 1); + + ip = (ip4_header_t *) mask; + +#define _(a) if (a) memset (&ip->a, 0xff, sizeof (ip->a)); + foreach_ip4_proto_field; +#undef _ + + ip->ip_version_and_header_length = 0; + + if (version) + ip->ip_version_and_header_length |= 0xF0; + + if (hdr_length) + ip->ip_version_and_header_length |= 0x0F; + + *maskp = mask; + return 1; +} + +#define foreach_ip6_proto_field \ +_(src_address) \ +_(dst_address) \ +_(payload_length) \ +_(hop_limit) \ +_(protocol) + +uword unformat_ip6_mask (unformat_input_t * input, va_list * args) +{ + u8 ** maskp = va_arg (*args, u8 **); + u8 * mask = 0; + u8 found_something = 0; + ip6_header_t * ip; + u32 ip_version_traffic_class_and_flow_label; + +#define _(a) u8 a=0; + foreach_ip6_proto_field; +#undef _ + u8 version = 0; + u8 traffic_class = 0; + u8 flow_label = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "version")) + version = 1; + else if (unformat (input, "traffic-class")) + traffic_class = 1; + else if (unformat (input, "flow-label")) + flow_label = 1; + else if (unformat (input, "src")) + src_address = 1; + else if (unformat (input, "dst")) + dst_address = 1; + else if (unformat (input, "proto")) + protocol = 1; + +#define _(a) else if (unformat (input, #a)) a=1; + foreach_ip6_proto_field +#undef _ + else + break; + } + +#define _(a) found_something += a; + foreach_ip6_proto_field; +#undef _ + + if (found_something == 0) + return 0; + + vec_validate (mask, sizeof (*ip) - 1); + + ip = (ip6_header_t *) mask; + +#define _(a) if (a) memset (&ip->a, 0xff, sizeof (ip->a)); + foreach_ip6_proto_field; +#undef _ + + ip_version_traffic_class_and_flow_label = 0; + + if (version) + ip_version_traffic_class_and_flow_label |= 0xF0000000; + + if (traffic_class) + ip_version_traffic_class_and_flow_label |= 0x0FF00000; + + if (flow_label) + ip_version_traffic_class_and_flow_label |= 0x000FFFFF; + + ip->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (ip_version_traffic_class_and_flow_label); + + *maskp = mask; + return 1; +} + +uword unformat_l3_mask (unformat_input_t * input, va_list * args) +{ + u8 ** maskp = va_arg (*args, u8 **); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "ip4 %U", unformat_ip4_mask, maskp)) + return 1; + else if (unformat (input, "ip6 %U", unformat_ip6_mask, maskp)) + return 1; + else + break; + } + return 0; +} + +uword unformat_l2_mask (unformat_input_t * input, va_list * args) +{ + u8 ** maskp = va_arg (*args, u8 **); + u8 * mask = 0; + u8 src = 0; + u8 dst = 0; + u8 proto = 0; + u8 tag1 = 0; + u8 tag2 = 0; + u8 ignore_tag1 = 0; + u8 ignore_tag2 = 0; + u8 cos1 = 0; + u8 cos2 = 0; + u8 dot1q = 0; + u8 dot1ad = 0; + int len = 14; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "src")) + src = 1; + else if (unformat (input, "dst")) + dst = 1; + else if (unformat (input, "proto")) + proto = 1; + else if (unformat (input, "tag1")) + tag1 = 1; + else if (unformat (input, "tag2")) + tag2 = 1; + else if (unformat (input, "ignore-tag1")) + ignore_tag1 = 1; + else if (unformat (input, "ignore-tag2")) + ignore_tag2 = 1; + else if (unformat (input, "cos1")) + cos1 = 1; + else if (unformat (input, "cos2")) + cos2 = 1; + else if (unformat (input, "dot1q")) + dot1q = 1; + else if (unformat (input, "dot1ad")) + dot1ad = 1; + else + break; + } + if ((src + dst + proto + tag1 + tag2 + dot1q + dot1ad + + ignore_tag1 + ignore_tag2 + cos1 + cos2) == 0) + return 0; + + if (tag1 || ignore_tag1 || cos1 || dot1q) + len = 18; + if (tag2 || ignore_tag2 || cos2 || dot1ad) + len = 22; + + vec_validate (mask, len-1); + + if (dst) + memset (mask, 0xff, 6); + + if (src) + memset (mask + 6, 0xff, 6); + + if (tag2 || dot1ad) + { + /* inner vlan tag */ + if (tag2) + { + mask[19] = 0xff; + mask[18] = 0x0f; + } + if (cos2) + mask[18] |= 0xe0; + if (proto) + mask[21] = mask [20] = 0xff; + if (tag1) + { + mask [15] = 0xff; + mask [14] = 0x0f; + } + if (cos1) + mask[14] |= 0xe0; + *maskp = mask; + return 1; + } + if (tag1 | dot1q) + { + if (tag1) + { + mask [15] = 0xff; + mask [14] = 0x0f; + } + if (cos1) + mask[14] |= 0xe0; + if (proto) + mask[16] = mask [17] = 0xff; + *maskp = mask; + return 1; + } + if (cos2) + mask[18] |= 0xe0; + if (cos1) + mask[14] |= 0xe0; + if (proto) + mask[12] = mask [13] = 0xff; + + *maskp = mask; + return 1; +} + +uword unformat_classify_mask (unformat_input_t * input, va_list * args) +{ + vnet_classify_main_t * CLIB_UNUSED(cm) + = va_arg (*args, vnet_classify_main_t *); + u8 ** maskp = va_arg (*args, u8 **); + u32 * skipp = va_arg (*args, u32 *); + u32 * matchp = va_arg (*args, u32 *); + u32 match; + u8 * mask = 0; + u8 * l2 = 0; + u8 * l3 = 0; + int i; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "hex %U", unformat_hex_string, &mask)) + ; + else if (unformat (input, "l2 %U", unformat_l2_mask, &l2)) + ; + else if (unformat (input, "l3 %U", unformat_l3_mask, &l3)) + ; + else + break; + } + + if (mask || l2 || l3) + { + if (l2 || l3) + { + /* "With a free Ethernet header in every package" */ + if (l2 == 0) + vec_validate (l2, 13); + mask = l2; + vec_append (mask, l3); + vec_free (l3); + } + + /* Scan forward looking for the first significant mask octet */ + for (i = 0; i < vec_len (mask); i++) + if (mask[i]) + break; + + /* compute (skip, match) params */ + *skipp = i / sizeof(u32x4); + vec_delete (mask, *skipp * sizeof(u32x4), 0); + + /* Pad mask to an even multiple of the vector size */ + while (vec_len (mask) % sizeof (u32x4)) + vec_add1 (mask, 0); + + match = vec_len (mask) / sizeof (u32x4); + + for (i = match*sizeof(u32x4); i > 0; i-= sizeof(u32x4)) + { + u64 *tmp = (u64 *)(mask + (i-sizeof(u32x4))); + if (*tmp || *(tmp+1)) + break; + match--; + } + if (match == 0) + clib_warning ("BUG: match 0"); + + _vec_len (mask) = match * sizeof(u32x4); + + *matchp = match; + *maskp = mask; + + return 1; + } + + return 0; +} + +#define foreach_l2_next \ +_(drop, DROP) \ +_(ethernet, ETHERNET_INPUT) \ +_(ip4, IP4_INPUT) \ +_(ip6, IP6_INPUT) \ +_(li, LI) + +uword unformat_l2_next_index (unformat_input_t * input, va_list * args) +{ + u32 * miss_next_indexp = va_arg (*args, u32 *); + u32 next_index = 0; + u32 tmp; + +#define _(n,N) \ + if (unformat (input, #n)) { next_index = L2_CLASSIFY_NEXT_##N; goto out;} + foreach_l2_next; +#undef _ + + if (unformat (input, "%d", &tmp)) + { + next_index = tmp; + goto out; + } + + return 0; + + out: + *miss_next_indexp = next_index; + return 1; +} + +#define foreach_ip_next \ +_(miss, MISS) \ +_(drop, DROP) \ +_(local, LOCAL) \ +_(rewrite, REWRITE) + +uword unformat_ip_next_index (unformat_input_t * input, va_list * args) +{ + u32 * miss_next_indexp = va_arg (*args, u32 *); + u32 next_index = 0; + u32 tmp; + +#define _(n,N) \ + if (unformat (input, #n)) { next_index = IP_LOOKUP_NEXT_##N; goto out;} + foreach_ip_next; +#undef _ + + if (unformat (input, "%d", &tmp)) + { + next_index = tmp; + goto out; + } + + return 0; + + out: + *miss_next_indexp = next_index; + return 1; +} + +#define foreach_acl_next \ +_(deny, DENY) + +uword unformat_acl_next_index (unformat_input_t * input, va_list * args) +{ + u32 * miss_next_indexp = va_arg (*args, u32 *); + u32 next_index = 0; + u32 tmp; + +#define _(n,N) \ + if (unformat (input, #n)) { next_index = ACL_NEXT_INDEX_##N; goto out;} + foreach_acl_next; +#undef _ + + if (unformat (input, "permit")) + { + next_index = ~0; + goto out; + } + else if (unformat (input, "%d", &tmp)) + { + next_index = tmp; + goto out; + } + + return 0; + + out: + *miss_next_indexp = next_index; + return 1; +} + +static clib_error_t * +classify_table_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 nbuckets = 2; + u32 skip = ~0; + u32 match = ~0; + int is_add = 1; + u32 table_index = ~0; + u32 next_table_index = ~0; + u32 miss_next_index = ~0; + u32 memory_size = 2<<20; + u32 tmp; + + u8 * mask = 0; + vnet_classify_main_t * cm = &vnet_classify_main; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "del")) + is_add = 0; + else if (unformat (input, "buckets %d", &nbuckets)) + ; + else if (unformat (input, "skip %d", &skip)) + ; + else if (unformat (input, "match %d", &match)) + ; + else if (unformat (input, "table %d", &table_index)) + ; + else if (unformat (input, "mask %U", unformat_classify_mask, + cm, &mask, &skip, &match)) + ; + else if (unformat (input, "memory-size %uM", &tmp)) + memory_size = tmp<<20; + else if (unformat (input, "memory-size %uG", &tmp)) + memory_size = tmp<<30; + else if (unformat (input, "next-table %d", &next_table_index)) + ; + else if (unformat (input, "miss-next %U", unformat_ip_next_index, + &miss_next_index)) + ; + else if (unformat (input, "l2-miss-next %U", unformat_l2_next_index, + &miss_next_index)) + ; + else if (unformat (input, "acl-miss-next %U", unformat_acl_next_index, + &miss_next_index)) + ; + + else + break; + } + + if (is_add && mask == 0) + return clib_error_return (0, "Mask required"); + + if (is_add && skip == ~0) + return clib_error_return (0, "skip count required"); + + if (is_add && match == ~0) + return clib_error_return (0, "match count required"); + + if (!is_add && table_index == ~0) + return clib_error_return (0, "table index required for delete"); + + rv = vnet_classify_add_del_table (cm, mask, nbuckets, memory_size, + skip, match, next_table_index, miss_next_index, + &table_index, is_add); + switch (rv) + { + case 0: + break; + + default: + return clib_error_return (0, "vnet_classify_add_del_table returned %d", + rv); + } + return 0; +} + +VLIB_CLI_COMMAND (classify_table, static) = { + .path = "classify table", + .short_help = + "classify table [miss-next|l2-miss_next|acl-miss-next <next_index>]" + "\n mask <mask-value> buckets <nn> [skip <n>] [match <n>] [del]", + .function = classify_table_command_fn, +}; + +static u8 * format_vnet_classify_table (u8 * s, va_list * args) +{ + vnet_classify_main_t * cm = va_arg (*args, vnet_classify_main_t *); + int verbose = va_arg (*args, int); + u32 index = va_arg (*args, u32); + vnet_classify_table_t * t; + + if (index == ~0) + { + s = format (s, "%10s%10s%10s%10s", "TableIdx", "Sessions", "NextTbl", + "NextNode", verbose ? "Details" : ""); + return s; + } + + t = pool_elt_at_index (cm->tables, index); + s = format (s, "%10u%10d%10d%10d", index, t->active_elements, + t->next_table_index, t->miss_next_index); + + s = format (s, "\n Heap: %U", format_mheap, t->mheap, 0 /*verbose*/); + + s = format (s, "\n nbuckets %d, skip %d match %d", + t->nbuckets, t->skip_n_vectors, t->match_n_vectors); + s = format (s, "\n mask %U", format_hex_bytes, t->mask, + t->match_n_vectors * sizeof (u32x4)); + + if (verbose == 0) + return s; + + s = format (s, "\n%U", format_classify_table, t, verbose); + + return s; +} + +static clib_error_t * +show_classify_tables_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + vnet_classify_table_t * t; + u32 match_index = ~0; + u32 * indices = 0; + int verbose = 0; + int i; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "index %d", &match_index)) + ; + else if (unformat (input, "verbose %d", &verbose)) + ; + else if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + pool_foreach (t, cm->tables, + ({ + if (match_index == ~0 || (match_index == t - cm->tables)) + vec_add1 (indices, t - cm->tables); + })); + + if (vec_len(indices)) + { + vlib_cli_output (vm, "%U", format_vnet_classify_table, cm, verbose, + ~0 /* hdr */); + for (i = 0; i < vec_len (indices); i++) + vlib_cli_output (vm, "%U", format_vnet_classify_table, cm, + verbose, indices[i]); + } + else + vlib_cli_output (vm, "No classifier tables configured"); + + vec_free (indices); + + return 0; +} + +VLIB_CLI_COMMAND (show_classify_table_command, static) = { + .path = "show classify tables", + .short_help = "show classify tables [index <nn>]", + .function = show_classify_tables_command_fn, +}; + +uword unformat_ip4_match (unformat_input_t * input, va_list * args) +{ + u8 ** matchp = va_arg (*args, u8 **); + u8 * match = 0; + ip4_header_t * ip; + int version = 0; + u32 version_val; + int hdr_length = 0; + u32 hdr_length_val; + int src = 0, dst = 0; + ip4_address_t src_val, dst_val; + int proto = 0; + u32 proto_val; + int tos = 0; + u32 tos_val; + int length = 0; + u32 length_val; + int fragment_id = 0; + u32 fragment_id_val; + int ttl = 0; + int ttl_val; + int checksum = 0; + u32 checksum_val; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "version %d", &version_val)) + version = 1; + else if (unformat (input, "hdr_length %d", &hdr_length_val)) + hdr_length = 1; + else if (unformat (input, "src %U", unformat_ip4_address, &src_val)) + src = 1; + else if (unformat (input, "dst %U", unformat_ip4_address, &dst_val)) + dst = 1; + else if (unformat (input, "proto %d", &proto_val)) + proto = 1; + else if (unformat (input, "tos %d", &tos_val)) + tos = 1; + else if (unformat (input, "length %d", &length_val)) + length = 1; + else if (unformat (input, "fragment_id %d", &fragment_id_val)) + fragment_id = 1; + else if (unformat (input, "ttl %d", &ttl_val)) + ttl = 1; + else if (unformat (input, "checksum %d", &checksum_val)) + checksum = 1; + else + break; + } + + if (version + hdr_length + src + dst + proto + tos + length + fragment_id + + ttl + checksum == 0) + return 0; + + /* + * Aligned because we use the real comparison functions + */ + vec_validate_aligned (match, sizeof (*ip) - 1, sizeof(u32x4)); + + ip = (ip4_header_t *) match; + + /* These are realistically matched in practice */ + if (src) + ip->src_address.as_u32 = src_val.as_u32; + + if (dst) + ip->dst_address.as_u32 = dst_val.as_u32; + + if (proto) + ip->protocol = proto_val; + + + /* These are not, but they're included for completeness */ + if (version) + ip->ip_version_and_header_length |= (version_val & 0xF)<<4; + + if (hdr_length) + ip->ip_version_and_header_length |= (hdr_length_val & 0xF); + + if (tos) + ip->tos = tos_val; + + if (length) + ip->length = length_val; + + if (ttl) + ip->ttl = ttl_val; + + if (checksum) + ip->checksum = checksum_val; + + *matchp = match; + return 1; +} + +uword unformat_ip6_match (unformat_input_t * input, va_list * args) +{ + u8 ** matchp = va_arg (*args, u8 **); + u8 * match = 0; + ip6_header_t * ip; + int version = 0; + u32 version_val; + u8 traffic_class = 0; + u32 traffic_class_val; + u8 flow_label = 0; + u8 flow_label_val; + int src = 0, dst = 0; + ip6_address_t src_val, dst_val; + int proto = 0; + u32 proto_val; + int payload_length = 0; + u32 payload_length_val; + int hop_limit = 0; + int hop_limit_val; + u32 ip_version_traffic_class_and_flow_label; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "version %d", &version_val)) + version = 1; + else if (unformat (input, "traffic_class %d", &traffic_class_val)) + traffic_class = 1; + else if (unformat (input, "flow_label %d", &flow_label_val)) + flow_label = 1; + else if (unformat (input, "src %U", unformat_ip6_address, &src_val)) + src = 1; + else if (unformat (input, "dst %U", unformat_ip6_address, &dst_val)) + dst = 1; + else if (unformat (input, "proto %d", &proto_val)) + proto = 1; + else if (unformat (input, "payload_length %d", &payload_length_val)) + payload_length = 1; + else if (unformat (input, "hop_limit %d", &hop_limit_val)) + hop_limit = 1; + else + break; + } + + if (version + traffic_class + flow_label + src + dst + proto + + payload_length + hop_limit == 0) + return 0; + + /* + * Aligned because we use the real comparison functions + */ + vec_validate_aligned (match, sizeof (*ip) - 1, sizeof(u32x4)); + + ip = (ip6_header_t *) match; + + if (src) + memcpy (&ip->src_address, &src_val, sizeof (ip->src_address)); + + if (dst) + memcpy (&ip->dst_address, &dst_val, sizeof (ip->dst_address)); + + if (proto) + ip->protocol = proto_val; + + ip_version_traffic_class_and_flow_label = 0; + + if (version) + ip_version_traffic_class_and_flow_label |= (version_val & 0xF) << 28; + + if (traffic_class) + ip_version_traffic_class_and_flow_label |= (traffic_class_val & 0xFF) << 20; + + if (flow_label) + ip_version_traffic_class_and_flow_label |= (flow_label_val & 0xFFFFF); + + ip->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (ip_version_traffic_class_and_flow_label); + + if (payload_length) + ip->payload_length = clib_host_to_net_u16 (payload_length_val); + + if (hop_limit) + ip->hop_limit = hop_limit_val; + + *matchp = match; + return 1; +} + +uword unformat_l3_match (unformat_input_t * input, va_list * args) +{ + u8 ** matchp = va_arg (*args, u8 **); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "ip4 %U", unformat_ip4_match, matchp)) + return 1; + else if (unformat (input, "ip6 %U", unformat_ip6_match, matchp)) + return 1; + /* $$$$ add mpls */ + else + break; + } + return 0; +} + +uword unformat_vlan_tag (unformat_input_t * input, va_list * args) +{ + u8 * tagp = va_arg (*args, u8 *); + u32 tag; + + if (unformat(input, "%d", &tag)) + { + tagp[0] = (tag>>8) & 0x0F; + tagp[1] = tag & 0xFF; + return 1; + } + + return 0; +} + +uword unformat_l2_match (unformat_input_t * input, va_list * args) +{ + u8 ** matchp = va_arg (*args, u8 **); + u8 * match = 0; + u8 src = 0; + u8 src_val[6]; + u8 dst = 0; + u8 dst_val[6]; + u8 proto = 0; + u16 proto_val; + u8 tag1 = 0; + u8 tag1_val [2]; + u8 tag2 = 0; + u8 tag2_val [2]; + int len = 14; + u8 ignore_tag1 = 0; + u8 ignore_tag2 = 0; + u8 cos1 = 0; + u8 cos2 = 0; + u32 cos1_val = 0; + u32 cos2_val = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "src %U", unformat_ethernet_address, &src_val)) + src = 1; + else if (unformat (input, "dst %U", unformat_ethernet_address, &dst_val)) + dst = 1; + else if (unformat (input, "proto %U", + unformat_ethernet_type_host_byte_order, &proto_val)) + proto = 1; + else if (unformat (input, "tag1 %U", unformat_vlan_tag, tag1_val)) + tag1 = 1; + else if (unformat (input, "tag2 %U", unformat_vlan_tag, tag2_val)) + tag2 = 1; + else if (unformat (input, "ignore-tag1")) + ignore_tag1 = 1; + else if (unformat (input, "ignore-tag2")) + ignore_tag2 = 1; + else if (unformat (input, "cos1 %d", &cos1_val)) + cos1 = 1; + else if (unformat (input, "cos2 %d", &cos2_val)) + cos2 = 1; + else + break; + } + if ((src + dst + proto + tag1 + tag2 + + ignore_tag1 + ignore_tag2 + cos1 + cos2) == 0) + return 0; + + if (tag1 || ignore_tag1 || cos1) + len = 18; + if (tag2 || ignore_tag2 || cos2) + len = 22; + + vec_validate_aligned (match, len-1, sizeof(u32x4)); + + if (dst) + memcpy (match, dst_val, 6); + + if (src) + memcpy (match + 6, src_val, 6); + + if (tag2) + { + /* inner vlan tag */ + match[19] = tag2_val[1]; + match[18] = tag2_val[0]; + if (cos2) + match [18] |= (cos2_val & 0x7) << 5; + if (proto) + { + match[21] = proto_val & 0xff; + match[20] = proto_val >> 8; + } + if (tag1) + { + match [15] = tag1_val[1]; + match [14] = tag1_val[0]; + } + if (cos1) + match [14] |= (cos1_val & 0x7) << 5; + *matchp = match; + return 1; + } + if (tag1) + { + match [15] = tag1_val[1]; + match [14] = tag1_val[0]; + if (proto) + { + match[17] = proto_val & 0xff; + match[16] = proto_val >> 8; + } + if (cos1) + match [14] |= (cos1_val & 0x7) << 5; + + *matchp = match; + return 1; + } + if (cos2) + match [18] |= (cos2_val & 0x7) << 5; + if (cos1) + match [14] |= (cos1_val & 0x7) << 5; + if (proto) + { + match[13] = proto_val & 0xff; + match[12] = proto_val >> 8; + } + + *matchp = match; + return 1; +} + + +uword unformat_classify_match (unformat_input_t * input, va_list * args) +{ + vnet_classify_main_t * cm = va_arg (*args, vnet_classify_main_t *); + u8 ** matchp = va_arg (*args, u8 **); + u32 table_index = va_arg (*args, u32); + vnet_classify_table_t * t; + + u8 * match = 0; + u8 * l2 = 0; + u8 * l3 = 0; + + if (pool_is_free_index (cm->tables, table_index)) + return 0; + + t = pool_elt_at_index (cm->tables, table_index); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "hex %U", unformat_hex_string, &match)) + ; + else if (unformat (input, "l2 %U", unformat_l2_match, &l2)) + ; + else if (unformat (input, "l3 %U", unformat_l3_match, &l3)) + ; + else + break; + } + + if (match || l2 || l3) + { + if (l2 || l3) + { + /* "Win a free Ethernet header in every packet" */ + if (l2 == 0) + vec_validate_aligned (l2, 13, sizeof(u32x4)); + match = l2; + vec_append_aligned (match, l3, sizeof(u32x4)); + vec_free (l3); + } + + /* Make sure the vector is big enough even if key is all 0's */ + vec_validate_aligned + (match, ((t->match_n_vectors + t->skip_n_vectors) * sizeof(u32x4)) - 1, + sizeof(u32x4)); + + /* Set size, include skipped vectors*/ + _vec_len (match) = (t->match_n_vectors+t->skip_n_vectors) * sizeof(u32x4); + + *matchp = match; + + return 1; + } + + return 0; +} + +int vnet_classify_add_del_session (vnet_classify_main_t * cm, + u32 table_index, + u8 * match, + u32 hit_next_index, + u32 opaque_index, + i32 advance, + int is_add) +{ + vnet_classify_table_t * t; + vnet_classify_entry_5_t _max_e __attribute__((aligned (16))); + vnet_classify_entry_t * e; + int i, rv; + + if (pool_is_free_index (cm->tables, table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE; + + t = pool_elt_at_index (cm->tables, table_index); + + e = (vnet_classify_entry_t *)&_max_e; + e->next_index = hit_next_index; + e->opaque_index = opaque_index; + e->advance = advance; + e->hits = 0; + e->last_heard = 0; + e->flags = 0; + + /* Copy key data, honoring skip_n_vectors */ + memcpy (&e->key, match + t->skip_n_vectors * sizeof (u32x4), + t->match_n_vectors * sizeof (u32x4)); + + /* Clear don't-care bits; likely when dynamically creating sessions */ + for (i = 0; i < t->match_n_vectors; i++) + e->key[i] &= t->mask[i]; + + rv = vnet_classify_add_del (t, e, is_add); + if (rv) + return VNET_API_ERROR_NO_SUCH_ENTRY; + return 0; +} + +static clib_error_t * +classify_session_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + int is_add = 1; + u32 table_index = ~0; + u32 hit_next_index = ~0; + u32 opaque_index = ~0; + u8 * match = 0; + i32 advance = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_add = 0; + else if (unformat (input, "hit-next %U", unformat_ip_next_index, + &hit_next_index)) + ; + else if (unformat (input, "l2-hit-next %U", unformat_l2_next_index, + &hit_next_index)) + ; + else if (unformat (input, "acl-hit-next %U", unformat_acl_next_index, + &hit_next_index)) + ; + else if (unformat (input, "opaque-index %d", &opaque_index)) + ; + else if (unformat (input, "match %U", unformat_classify_match, + cm, &match, table_index)) + ; + else if (unformat (input, "advance %d", &advance)) + ; + else if (unformat (input, "table-index %d", &table_index)) + ; + else + break; + } + + if (table_index == ~0) + return clib_error_return (0, "Table index required"); + + if (is_add && match == 0) + return clib_error_return (0, "Match value required"); + + rv = vnet_classify_add_del_session (cm, table_index, match, + hit_next_index, + opaque_index, advance, is_add); + + switch(rv) + { + case 0: + break; + + default: + return clib_error_return (0, "vnet_classify_add_del_session returned %d", + rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (classify_session_command, static) = { + .path = "classify session", + .short_help = + "classify session [hit-next|l2-hit-next|acl-hit-next <next_index>]" + "\n table-index <nn> match [hex] [l2] [l3 ip4]", + .function = classify_session_command_fn, +}; + +#define TEST_CODE 1 + +#if TEST_CODE > 0 +static clib_error_t * +test_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 buckets = 2; + u32 sessions = 10; + int i, rv; + vnet_classify_table_t * t = 0; + classify_data_or_mask_t * mask; + classify_data_or_mask_t * data; + u8 *mp = 0, *dp = 0; + vnet_classify_main_t * cm = &vnet_classify_main; + vnet_classify_entry_t * e; + int is_add = 1; + u32 tmp; + u32 table_index = ~0; + ip4_address_t src; + u32 deleted = 0; + u32 memory_size = 64<<20; + + /* Default starting address 1.0.0.10 */ + src.as_u32 = clib_net_to_host_u32 (0x0100000A); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "sessions %d", &sessions)) + ; + else if (unformat (input, "src %U", unformat_ip4_address, &src)) + ; + else if (unformat (input, "buckets %d", &buckets)) + ; + else if (unformat (input, "memory-size %uM", &tmp)) + memory_size = tmp<<20; + else if (unformat (input, "memory-size %uG", &tmp)) + memory_size = tmp<<30; + else if (unformat (input, "del")) + is_add = 0; + else if (unformat (input, "table %d", &table_index)) + ; + else + break; + } + + vec_validate_aligned (mp, 3 * sizeof(u32x4), sizeof(u32x4)); + vec_validate_aligned (dp, 3 * sizeof(u32x4), sizeof(u32x4)); + + mask = (classify_data_or_mask_t *) mp; + data = (classify_data_or_mask_t *) dp; + + data->ip.src_address.as_u32 = src.as_u32; + + /* Mask on src address */ + memset (&mask->ip.src_address, 0xff, 4); + + buckets = 1<<max_log2(buckets); + + if (table_index != ~0) + { + if (pool_is_free_index (cm->tables, table_index)) + { + vlib_cli_output (vm, "No such table %d", table_index); + goto out; + } + t = pool_elt_at_index (cm->tables, table_index); + } + + if (is_add) + { + if (t == 0) + { + t = vnet_classify_new_table (cm, (u8 *)mask, buckets, + memory_size, + 0 /* skip */, + 3 /* vectors to match */); + t->miss_next_index = IP_LOOKUP_NEXT_LOCAL; + vlib_cli_output (vm, "Create table %d", t - cm->tables); + } + + vlib_cli_output (vm, "Add %d sessions to %d buckets...", + sessions, buckets); + + for (i = 0; i < sessions; i++) + { + rv = vnet_classify_add_del_session (cm, t - cm->tables, (u8 *) data, + IP_LOOKUP_NEXT_DROP, + i+100 /* opaque_index */, + 0 /* advance */, + 1 /* is_add */); + + if (rv != 0) + clib_warning ("add: returned %d", rv); + + tmp = clib_net_to_host_u32 (data->ip.src_address.as_u32) + 1; + data->ip.src_address.as_u32 = clib_net_to_host_u32 (tmp); + } + goto out; + } + + if (t == 0) + { + vlib_cli_output (vm, "Must specify table index to delete sessions"); + goto out; + } + + vlib_cli_output (vm, "Try to delete %d sessions...", sessions); + + for (i = 0; i < sessions; i++) + { + u8 * key_minus_skip; + u64 hash; + + hash = vnet_classify_hash_packet (t, (u8 *) data); + + e = vnet_classify_find_entry (t, (u8 *) data, hash, 0 /* time_now */); + /* Previous delete, perhaps... */ + if (e == 0) + continue; + ASSERT (e->opaque_index == (i+100)); + + key_minus_skip = (u8 *)e->key; + key_minus_skip -= t->skip_n_vectors * sizeof (u32x4); + + rv = vnet_classify_add_del_session (cm, t - cm->tables, key_minus_skip, + IP_LOOKUP_NEXT_DROP, + i+100 /* opaque_index */, + 0 /* advance */, + 0 /* is_add */); + if (rv != 0) + clib_warning ("del: returned %d", rv); + + tmp = clib_net_to_host_u32 (data->ip.src_address.as_u32) + 1; + data->ip.src_address.as_u32 = clib_net_to_host_u32 (tmp); + deleted++; + } + + vlib_cli_output (vm, "Deleted %d sessions...", deleted); + + out: + vec_free (mp); + vec_free (dp); + + return 0; +} + +VLIB_CLI_COMMAND (test_classify_command, static) = { + .path = "test classify", + .short_help = + "test classify [src <ip>] [sessions <nn>] [buckets <nn>] [table <nn>] [del]", + .function = test_classify_command_fn, +}; +#endif /* TEST_CODE */ diff --git a/vnet/vnet/classify/vnet_classify.h b/vnet/vnet/classify/vnet_classify.h new file mode 100644 index 00000000000..03271ad2e06 --- /dev/null +++ b/vnet/vnet/classify/vnet_classify.h @@ -0,0 +1,414 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_vnet_classify_h__ +#define __included_vnet_classify_h__ + +#include <stdarg.h> + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/api_errno.h> /* for API error numbers */ + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> +#include <vppinfra/xxhash.h> + +vlib_node_registration_t ip4_classify_node; +vlib_node_registration_t ip6_classify_node; + +#define CLASSIFY_TRACE 0 + +struct _vnet_classify_main; +typedef struct _vnet_classify_main vnet_classify_main_t; + +#define foreach_size_in_u32x4 \ +_(1) \ +_(2) \ +_(3) \ +_(4) \ +_(5) + +typedef CLIB_PACKED(struct _vnet_classify_entry { + /* Graph node next index */ + u32 next_index; + + /* put into vnet_buffer(b)->l2_classfy.opaque_index */ + union { + struct { + u32 opaque_index; + /* advance on hit, note it's a signed quantity... */ + i32 advance; + }; + u64 opaque_count; + }; + + /* Really only need 1 bit */ + u32 flags; +#define VNET_CLASSIFY_ENTRY_FREE (1<<0) + + /* Hit counter, last heard time */ + union { + u64 hits; + struct _vnet_classify_entry * next_free; + }; + + f64 last_heard; + + /* Must be aligned to a 16-octet boundary */ + u32x4 key[0]; +}) vnet_classify_entry_t; + +static inline int vnet_classify_entry_is_free (vnet_classify_entry_t * e) +{ + return e->flags & VNET_CLASSIFY_ENTRY_FREE; +} + +static inline int vnet_classify_entry_is_busy (vnet_classify_entry_t * e) +{ + return ((e->flags & VNET_CLASSIFY_ENTRY_FREE) == 0); +} + +/* Need these to con the vector allocator */ +#define _(size) \ +typedef CLIB_PACKED(struct { \ + u32 pad0[4]; \ + u64 pad1[2]; \ + u32x4 key[size]; \ +}) vnet_classify_entry_##size##_t; +foreach_size_in_u32x4; +#undef _ + +typedef struct { + union { + struct { + u32 offset; + u8 pad[3]; + u8 log2_pages; + }; + u64 as_u64; + }; +} vnet_classify_bucket_t; + +typedef struct { + /* Mask to apply after skipping N vectors */ + u32x4 *mask; + /* Buckets and entries */ + vnet_classify_bucket_t * buckets; + vnet_classify_entry_t * entries; + + /* Config parameters */ + u32 match_n_vectors; + u32 skip_n_vectors; + u32 nbuckets; + u32 log2_nbuckets; + int entries_per_page; + u32 active_elements; + /* Index of next table to try */ + u32 next_table_index; + + /* Miss next index, return if next_table_index = 0 */ + u32 miss_next_index; + + /* Per-bucket working copies, one per thread */ + vnet_classify_entry_t ** working_copies; + vnet_classify_bucket_t saved_bucket; + + /* Free entry freelists */ + vnet_classify_entry_t **freelists; + + u8 * name; + + /* Private allocation arena, protected by the writer lock */ + void * mheap; + + /* Writer (only) lock for this table */ + volatile u32 * writer_lock; + +} vnet_classify_table_t; + +struct _vnet_classify_main { + /* Table pool */ + vnet_classify_table_t * tables; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +}; + +vnet_classify_main_t vnet_classify_main; + +u8 * format_classify_table (u8 * s, va_list * args); + +u64 vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h); + +static inline u64 +vnet_classify_hash_packet_inline (vnet_classify_table_t * t, + u8 * h) +{ + u32x4 *data, *mask; + + union { + u32x4 as_u32x4; + u64 as_u64[2]; + } xor_sum __attribute__((aligned(sizeof(u32x4)))); + + ASSERT(t); + + data = (u32x4 *)h; + mask = t->mask; + + ASSERT ((((u64)h) & 0xf) == 0); + + xor_sum.as_u32x4 = data[0 + t->skip_n_vectors] & mask[0]; + + switch (t->match_n_vectors) + { + case 5: + xor_sum.as_u32x4 ^= data[4 + t->skip_n_vectors] & mask[4]; + /* FALLTHROUGH */ + case 4: + xor_sum.as_u32x4 ^= data[3 + t->skip_n_vectors] & mask[3]; + /* FALLTHROUGH */ + case 3: + xor_sum.as_u32x4 ^= data[2 + t->skip_n_vectors] & mask[2]; + /* FALLTHROUGH */ + case 2: + xor_sum.as_u32x4 ^= data[1 + t->skip_n_vectors] & mask[1]; + /* FALLTHROUGH */ + case 1: + break; + + default: + abort(); + } + + return clib_xxhash (xor_sum.as_u64[0] ^ xor_sum.as_u64[1]); +} + +static inline void +vnet_classify_prefetch_bucket (vnet_classify_table_t * t, u64 hash) +{ + u32 bucket_index; + + ASSERT (is_pow2(t->nbuckets)); + + bucket_index = hash & (t->nbuckets - 1); + + CLIB_PREFETCH(&t->buckets[bucket_index], CLIB_CACHE_LINE_BYTES, LOAD); +} + +static inline vnet_classify_entry_t * +vnet_classify_get_entry (vnet_classify_table_t * t, uword offset) +{ + u8 * hp = t->mheap; + u8 * vp = hp + offset; + + return (void *) vp; +} + +static inline uword vnet_classify_get_offset (vnet_classify_table_t * t, + vnet_classify_entry_t * v) +{ + u8 * hp, * vp; + + hp = (u8 *) t->mheap; + vp = (u8 *) v; + + ASSERT((vp - hp) < 0x100000000ULL); + return vp - hp; +} + +static inline vnet_classify_entry_t * +vnet_classify_entry_at_index (vnet_classify_table_t * t, + vnet_classify_entry_t * e, + u32 index) +{ + u8 * eu8; + + eu8 = (u8 *)e; + + eu8 += index * (sizeof (vnet_classify_entry_t) + + (t->match_n_vectors * sizeof (u32x4))); + + return (vnet_classify_entry_t *) eu8; +} + +static inline void +vnet_classify_prefetch_entry (vnet_classify_table_t * t, + u64 hash) +{ + u32 bucket_index; + u32 value_index; + vnet_classify_bucket_t * b; + vnet_classify_entry_t * e; + + bucket_index = hash & (t->nbuckets - 1); + + b = &t->buckets[bucket_index]; + + if (b->offset == 0) + return; + + hash >>= t->log2_nbuckets; + + e = vnet_classify_get_entry (t, b->offset); + value_index = hash & ((1<<b->log2_pages)-1); + + e = vnet_classify_entry_at_index (t, e, value_index); + + CLIB_PREFETCH(e, CLIB_CACHE_LINE_BYTES, LOAD); +} + +vnet_classify_entry_t * +vnet_classify_find_entry (vnet_classify_table_t * t, + u8 * h, u64 hash, f64 now); + +static inline vnet_classify_entry_t * +vnet_classify_find_entry_inline (vnet_classify_table_t * t, + u8 * h, u64 hash, f64 now) + { + vnet_classify_entry_t * v; + u32x4 * mask, * data, *data_start, * key; + u32x4 result __attribute__((aligned(sizeof(u32x4)))); + vnet_classify_bucket_t * b; + u32 value_index; + u32 result_mask; + u32 bucket_index; + int i; + + ASSERT ((((u64)h) & 0xf) == 0); + + data_start = (u32x4 *) h; + + bucket_index = hash & (t->nbuckets-1); + b = &t->buckets[bucket_index]; + + if (b->offset == 0) + return 0; + + hash >>= t->log2_nbuckets; + + v = vnet_classify_get_entry (t, b->offset); + value_index = hash & ((1<<b->log2_pages)-1); + + v = vnet_classify_entry_at_index (t, v, value_index); + + for (i = 0; i < t->entries_per_page; i++) + { + mask = t->mask; + data = data_start; + key = v->key; + + switch (t->match_n_vectors) + { + case 1: + result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + break; + + case 2: + result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; + break; + + case 3: + result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; + result |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; + break; + + case 4: + result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; + result |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; + result |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3]; + break; + + case 5: + result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; + result |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; + result |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3]; + result |= (data[4 + t->skip_n_vectors] & mask[4]) ^ key[4]; + break; + + default: + abort(); + } + + result_mask = u32x4_zero_byte_mask (result); + if (result_mask == 0xffff) + { + if (PREDICT_TRUE(now)) + { + v->hits++; + v->last_heard = now; + } + return (v); + } + v = vnet_classify_entry_at_index (t, v, 1); + } + return 0; +} + +vnet_classify_table_t * +vnet_classify_new_table (vnet_classify_main_t *cm, + u8 * mask, u32 nbuckets, u32 memory_size, + u32 skip_n_vectors, + u32 match_n_vectors); + +int vnet_classify_add_del_session (vnet_classify_main_t * cm, + u32 table_index, + u8 * match, + u32 hit_next_index, + u32 opaque_index, + i32 advance, + int is_add); + +int vnet_classify_add_del_table (vnet_classify_main_t * cm, + u8 * mask, + u32 nbuckets, + u32 memory_size, + u32 skip, + u32 match, + u32 next_table_index, + u32 miss_next_index, + u32 * table_index, + int is_add); + +unformat_function_t unformat_ip4_mask; +unformat_function_t unformat_ip6_mask; +unformat_function_t unformat_l3_mask; +unformat_function_t unformat_l2_mask; +unformat_function_t unformat_classify_mask; +unformat_function_t unformat_l2_next_index; +unformat_function_t unformat_ip_next_index; +unformat_function_t unformat_ip4_match; +unformat_function_t unformat_ip6_match; +unformat_function_t unformat_l3_match; +unformat_function_t unformat_vlan_tag; +unformat_function_t unformat_l2_match; +unformat_function_t unformat_classify_match; + +#endif /* __included_vnet_classify_h__ */ diff --git a/vnet/vnet/config.c b/vnet/vnet/config.c new file mode 100644 index 00000000000..74c4caa847f --- /dev/null +++ b/vnet/vnet/config.c @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * config.c: feature configuration + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> + +static vnet_config_feature_t * +duplicate_feature_vector (vnet_config_feature_t * feature_vector) +{ + vnet_config_feature_t * result, * f; + + result = vec_dup (feature_vector); + vec_foreach (f, result) + f->feature_config = vec_dup (f->feature_config); + + return result; +} + +static void +free_feature_vector (vnet_config_feature_t * feature_vector) +{ + vnet_config_feature_t * f; + + vec_foreach (f, feature_vector) + vnet_config_feature_free (f); + vec_free (feature_vector); +} + +static u32 +add_next (vlib_main_t * vm, + vnet_config_main_t * cm, + u32 last_node_index, + u32 this_node_index) +{ + u32 i, ni = ~0; + + if (last_node_index != ~0) + return vlib_node_add_next (vm, last_node_index, this_node_index); + + for (i = 0; i < vec_len (cm->start_node_indices); i++) + { + u32 tmp; + tmp = vlib_node_add_next (vm, cm->start_node_indices[i], this_node_index); + if (ni == ~0) + ni = tmp; + /* Start nodes to first must agree on next indices. */ + ASSERT (ni == tmp); + } + + return ni; +} + +static vnet_config_t * +find_config_with_features (vlib_main_t * vm, + vnet_config_main_t * cm, + vnet_config_feature_t * feature_vector) +{ + u32 last_node_index = ~0; + vnet_config_feature_t * f; + u32 * config_string; + uword * p; + vnet_config_t * c; + + config_string = cm->config_string_temp; + cm->config_string_temp = 0; + if (config_string) + _vec_len (config_string) = 0; + + vec_foreach (f, feature_vector) + { + /* Connect node graph. */ + f->next_index = add_next (vm, cm, last_node_index, f->node_index); + last_node_index = f->node_index; + + /* Store next index in config string. */ + vec_add1 (config_string, f->next_index); + + /* Store feature config. */ + vec_add (config_string, f->feature_config, vec_len (f->feature_config)); + } + + /* Terminate config string with next for end node. */ + if (last_node_index == ~0 || last_node_index != cm->end_node_index) + { + u32 next_index = add_next (vm, cm, last_node_index, cm->end_node_index); + vec_add1 (config_string, next_index); + } + + /* See if config string is unique. */ + p = hash_get_mem (cm->config_string_hash, config_string); + if (p) + { + /* Not unique. Share existing config. */ + cm->config_string_temp = config_string; /* we'll use it again later. */ + free_feature_vector (feature_vector); + c = pool_elt_at_index (cm->config_pool, p[0]); + } + else + { + u32 * d; + + pool_get (cm->config_pool, c); + c->index = c - cm->config_pool; + c->features = feature_vector; + c->config_string_vector = config_string; + + /* Allocate copy of config string in heap. + VLIB buffers will maintain pointers to heap as they read out + configuration data. */ + c->config_string_heap_index + = heap_alloc (cm->config_string_heap, vec_len (config_string) + 1, + c->config_string_heap_handle); + + /* First element in heap points back to pool index. */ + d = vec_elt_at_index (cm->config_string_heap, c->config_string_heap_index); + d[0] = c->index; + memcpy (d + 1, config_string, vec_bytes (config_string)); + hash_set_mem (cm->config_string_hash, config_string, c->index); + + c->reference_count = 0; /* will be incremented by caller. */ + } + + return c; +} + +void vnet_config_init (vlib_main_t * vm, + vnet_config_main_t * cm, + char * start_node_names[], + int n_start_node_names, + char * feature_node_names[], + int n_feature_node_names) +{ + vlib_node_t * n; + u32 i; + + memset (cm, 0, sizeof (cm[0])); + + cm->config_string_hash = hash_create_vec (0, STRUCT_SIZE_OF (vnet_config_t, config_string_vector[0]), sizeof (uword)); + + ASSERT (n_start_node_names >= 1); + ASSERT (n_feature_node_names >= 1); + + vec_resize (cm->start_node_indices, n_start_node_names); + for (i = 0; i < n_start_node_names; i++) + { + n = vlib_get_node_by_name (vm, (u8 *) start_node_names[i]); + /* Given node name must exist. */ + ASSERT (n != 0); + cm->start_node_indices[i] = n->index; + } + + vec_resize (cm->node_index_by_feature_index, n_feature_node_names); + for (i = 0; i < n_feature_node_names; i++) + { + if (! feature_node_names[i]) + cm->node_index_by_feature_index[i] = ~0; + else + { + n = vlib_get_node_by_name (vm, (u8 *) feature_node_names[i]); + /* Given node may exist in plug-in library which is not present */ + if (n) + { + if (i + 1 == n_feature_node_names) + cm->end_node_index = n->index; + cm->node_index_by_feature_index[i] = n->index; + } + else cm->node_index_by_feature_index[i] = ~0; + } + } +} + +static void +remove_reference (vnet_config_main_t * cm, vnet_config_t * c) +{ + ASSERT (c->reference_count > 0); + c->reference_count -= 1; + if (c->reference_count == 0) + { + hash_unset (cm->config_string_hash, c->config_string_vector); + vnet_config_free (cm, c); + pool_put (cm->config_pool, c); + } +} + +always_inline u32 * +vnet_get_config_heap (vnet_config_main_t * cm, u32 ci) +{ return heap_elt_at_index (cm->config_string_heap, ci); } + +u32 vnet_config_add_feature (vlib_main_t * vm, + vnet_config_main_t * cm, + u32 config_string_heap_index, + u32 feature_index, + void * feature_config, + u32 n_feature_config_bytes) +{ + vnet_config_t * old, * new; + vnet_config_feature_t * new_features, * f; + u32 n_feature_config_u32s; + u32 node_index = vec_elt (cm->node_index_by_feature_index, feature_index); + + if (node_index == ~0) // feature node does not exist + return config_string_heap_index; // return original config index + + if (config_string_heap_index == ~0) + { + old = 0; + new_features = 0; + } + else + { + u32 * p = vnet_get_config_heap (cm, config_string_heap_index); + old = pool_elt_at_index (cm->config_pool, p[-1]); + new_features = old->features; + if (new_features) + new_features = duplicate_feature_vector (new_features); + } + + vec_add2 (new_features, f, 1); + f->feature_index = feature_index; + f->node_index = node_index; + + n_feature_config_u32s = round_pow2 (n_feature_config_bytes, sizeof (f->feature_config[0])) / sizeof (f->feature_config[0]); + vec_add (f->feature_config, feature_config, n_feature_config_u32s); + + /* Sort (prioritize) features. */ + if (vec_len (new_features) > 1) + vec_sort (new_features, f1, f2, (int) f1->feature_index - f2->feature_index); + + if (old) + remove_reference (cm, old); + + new = find_config_with_features (vm, cm, new_features); + new->reference_count += 1; + + /* User gets pointer to config string first element (which defines the pool index + this config string comes from). */ + return new->config_string_heap_index + 1; +} + +u32 vnet_config_del_feature (vlib_main_t * vm, + vnet_config_main_t * cm, + u32 config_string_heap_index, + u32 feature_index, + void * feature_config, + u32 n_feature_config_bytes) +{ + vnet_config_t * old, * new; + vnet_config_feature_t * new_features, * f; + u32 n_feature_config_u32s; + + { + u32 * p = vnet_get_config_heap (cm, config_string_heap_index); + + old = pool_elt_at_index (cm->config_pool, p[-1]); + } + + n_feature_config_u32s = round_pow2 (n_feature_config_bytes, sizeof (f->feature_config[0])) / sizeof (f->feature_config[0]); + + /* Find feature with same index and opaque data. */ + vec_foreach (f, old->features) + { + if (f->feature_index == feature_index + && vec_len (f->feature_config) == n_feature_config_u32s + && (n_feature_config_u32s == 0 + || ! memcmp (f->feature_config, feature_config, n_feature_config_bytes))) + break; + } + + /* Feature not found. */ + if (f >= vec_end (old->features)) + return config_string_heap_index; // return original config index + + new_features = duplicate_feature_vector (old->features); + f = new_features + (f - old->features); + vnet_config_feature_free (f); + vec_delete (new_features, 1, f - new_features); + + /* must remove old from config_pool now as it may be expanded and change + memory location if the following function find_config_with_features() + adds a new config because none of existing config's has matching features + and so can be reused */ + remove_reference (cm, old); + new = find_config_with_features (vm, cm, new_features); + new->reference_count += 1; + + return new->config_string_heap_index + 1; +} diff --git a/vnet/vnet/config.h b/vnet/vnet/config.h new file mode 100644 index 00000000000..1ace30fef88 --- /dev/null +++ b/vnet/vnet/config.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * config.h: feature configuration + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_config_h +#define included_vnet_config_h + +#include <vlib/vlib.h> +#include <vppinfra/heap.h> + +typedef struct { + /* Features are prioritized by index. Smaller indices get + performed first. */ + u32 feature_index; + + /* VLIB node which performs feature. */ + u32 node_index; + + /* Next index relative to previous node or main node. */ + u32 next_index; + + /* Opaque per feature configuration data. */ + u32 * feature_config; +} vnet_config_feature_t; + +always_inline void +vnet_config_feature_free (vnet_config_feature_t * f) +{ vec_free (f->feature_config); } + +typedef struct { + /* Sorted vector of features for this configuration. */ + vnet_config_feature_t * features; + + /* Config string as vector for hashing. */ + u32 * config_string_vector; + + /* Config string including all next indices and feature data as a vector. */ + u32 config_string_heap_index, config_string_heap_handle; + + /* Index in main pool. */ + u32 index; + + /* Number of interfaces/traffic classes that reference this config. */ + u32 reference_count; +} vnet_config_t; + +typedef struct { + /* Pool of configs. Index 0 is always null config and is never deleted. */ + vnet_config_t * config_pool; + + /* Hash table mapping vector config string to config pool index. */ + uword * config_string_hash; + + /* Global heap of configuration data. */ + u32 * config_string_heap; + + /* Node index which starts/ends feature processing. */ + u32 * start_node_indices, end_node_index; + + /* Interior feature processing nodes (not including start and end nodes). */ + u32 * node_index_by_feature_index; + + /* Temporary vector for holding config strings. Used to avoid continually + allocating vectors. */ + u32 * config_string_temp; +} vnet_config_main_t; + +always_inline void +vnet_config_free (vnet_config_main_t * cm, vnet_config_t * c) +{ + vnet_config_feature_t * f; + vec_foreach (f, c->features) + vnet_config_feature_free (f); + vec_free (c->features); + heap_dealloc (cm->config_string_heap, c->config_string_heap_handle); + vec_free (c->config_string_vector); +} + +always_inline void * +vnet_get_config_data (vnet_config_main_t * cm, + u32 * config_index, + u32 * next_index, + u32 n_data_bytes) +{ + u32 i, n, * d; + + i = *config_index; + + d = heap_elt_at_index (cm->config_string_heap, i); + + n = round_pow2 (n_data_bytes, sizeof (d[0])) / sizeof (d[0]); + + /* Last 32 bits are next index. */ + *next_index = d[n]; + + /* Advance config index to next config. */ + *config_index = (i + n + 1); + + /* Return config data to user for this feature. */ + return (void *) d; +} + +void vnet_config_init (vlib_main_t * vm, + vnet_config_main_t * cm, + char * start_node_names[], + int n_start_node_names, + char * feature_node_names[], + int n_feature_node_names); + +/* Calls to add/delete features from configurations. */ +u32 vnet_config_add_feature (vlib_main_t * vm, + vnet_config_main_t * cm, + u32 config_id, + u32 feature_index, + void * feature_config, + u32 n_feature_config_bytes); + +u32 vnet_config_del_feature (vlib_main_t * vm, + vnet_config_main_t * cm, + u32 config_id, + u32 feature_index, + void * feature_config, + u32 n_feature_config_bytes); + +#endif /* included_vnet_config_h */ diff --git a/vnet/vnet/devices/dpdk/cli.c b/vnet/vnet/devices/dpdk/cli.c new file mode 100644 index 00000000000..c27dbfabfc0 --- /dev/null +++ b/vnet/vnet/devices/dpdk/cli.c @@ -0,0 +1,974 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/error.h> +#include <vppinfra/format.h> +#include <vppinfra/xxhash.h> + +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/dpdk/dpdk.h> +#include <vnet/classify/vnet_classify.h> +#include <vnet/mpls-gre/packet.h> + +#include "dpdk_priv.h" + +frame_queue_trace_t *frame_queue_traces; + +static clib_error_t * +pcap_trace_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dpdk_main_t * dm = &dpdk_main; + u8 * filename; + u32 max; + int matched = 0; + clib_error_t * error = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "on")) + { + if (dm->tx_pcap_enable == 0) + { + if (dm->pcap_filename == 0) + dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); + + memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); + dm->pcap_main.file_name = (char *) dm->pcap_filename; + dm->pcap_main.n_packets_to_capture = 100; + if (dm->pcap_pkts_to_capture) + dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; + + dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; + dm->tx_pcap_enable = 1; + matched = 1; + vlib_cli_output (vm, "pcap tx capture on..."); + } + else + { + vlib_cli_output (vm, "pcap tx capture already on..."); + } + matched = 1; + } + else if (unformat (input, "off")) + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, "captured %d pkts...", + dm->pcap_main.n_packets_captured+1); + if (dm->pcap_main.n_packets_captured) + { + dm->pcap_main.n_packets_to_capture = + dm->pcap_main.n_packets_captured; + error = pcap_write (&dm->pcap_main); + if (error) + clib_error_report (error); + else + vlib_cli_output (vm, "saved to %s...", dm->pcap_filename); + } + } + else + { + vlib_cli_output (vm, "pcap tx capture already off..."); + } + + dm->tx_pcap_enable = 0; + matched = 1; + } + else if (unformat (input, "max %d", &max)) + { + dm->pcap_pkts_to_capture = max; + matched = 1; + } + + else if (unformat (input, "intfc %U", + unformat_vnet_sw_interface, dm->vnet_main, + &dm->pcap_sw_if_index)) + matched = 1; + else if (unformat (input, "intfc any")) + { + dm->pcap_sw_if_index = 0; + matched = 1; + } + else if (unformat (input, "file %s", &filename)) + { + u8 * chroot_filename; + /* Brain-police user path input */ + if (strstr((char *)filename, "..") || index((char *)filename, '/')) + { + vlib_cli_output (vm, "illegal characters in filename '%s'", + filename); + continue; + } + + chroot_filename = format (0, "/tmp/%s%c", filename, 0); + vec_free (filename); + + if (dm->pcap_filename) + vec_free (dm->pcap_filename); + vec_add1 (filename, 0); + dm->pcap_filename = chroot_filename; + matched = 1; + } + else if (unformat (input, "status")) + { + if (dm->tx_pcap_enable == 0) + { + vlib_cli_output (vm, "pcap tx capture is off..."); + continue; + } + + vlib_cli_output (vm, "pcap tx capture: %d of %d pkts...", + dm->pcap_main.n_packets_captured, + dm->pcap_main.n_packets_to_capture); + matched = 1; + } + + else + break; + } + + if (matched == 0) + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + return 0; +} + +VLIB_CLI_COMMAND (pcap_trace_command, static) = { + .path = "pcap tx trace", + .short_help = + "pcap tx trace on off max <nn> intfc <intfc> file <name> status", + .function = pcap_trace_command_fn, +}; + + +static clib_error_t * +show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + struct rte_mempool * rmp; + int i; + + for(i = 0; i < vec_len(vm->buffer_main->pktmbuf_pools); i++) + { + rmp = vm->buffer_main->pktmbuf_pools[i]; + if (rmp) + { + unsigned count = rte_mempool_count(rmp); + unsigned free_count = rte_mempool_free_count(rmp); + + vlib_cli_output(vm, "name=\"%s\" available = %7d allocated = %7d total = %7d\n", + rmp->name, (u32)count, (u32)free_count, + (u32)(count+free_count)); + } + else + { + vlib_cli_output(vm, "rte_mempool is NULL (!)\n"); + } + } + return 0; +} + +VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = { + .path = "show dpdk buffer", + .short_help = "show dpdk buffer state", + .function = show_dpdk_buffer, +}; + +static clib_error_t * +test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + static u32 * allocated_buffers; + u32 n_alloc = 0; + u32 n_free = 0; + u32 first, actual_alloc; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "allocate %d", &n_alloc)) + ; + else if (unformat (input, "free %d", &n_free)) + ; + else + break; + } + + if (n_free) + { + if (vec_len (allocated_buffers) < n_free) + return clib_error_return (0, "Can't free %d, only %d allocated", + n_free, vec_len (allocated_buffers)); + + first = vec_len(allocated_buffers) - n_free; + vlib_buffer_free (vm, allocated_buffers + first, n_free); + _vec_len (allocated_buffers) = first; + } + if (n_alloc) + { + first = vec_len (allocated_buffers); + vec_validate (allocated_buffers, + vec_len (allocated_buffers) + n_alloc - 1); + + actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first, + n_alloc); + _vec_len (allocated_buffers) = first + actual_alloc; + + if (actual_alloc < n_alloc) + vlib_cli_output (vm, "WARNING: only allocated %d buffers", + actual_alloc); + } + + vlib_cli_output (vm, "Currently %d buffers allocated", + vec_len (allocated_buffers)); + + if (allocated_buffers && vec_len(allocated_buffers) == 0) + vec_free(allocated_buffers); + + return 0; +} + +VLIB_CLI_COMMAND (cmd_test_dpdk_bufferr,static) = { + .path = "test dpdk buffer", + .short_help = "test dpdk buffer [allocate <nn>][free <nn>]", + .function = test_dpdk_buffer, +}; + +static void +show_dpdk_device_stats (vlib_main_t * vm, dpdk_device_t * xd) +{ + vlib_cli_output(vm, + "device_index %d\n" + " last_burst_sz %d\n" + " max_burst_sz %d\n" + " full_frames_cnt %u\n" + " consec_full_frames_cnt %u\n" + " congestion_cnt %d\n" + " last_poll_time %llu\n" + " max_poll_delay %llu\n" + " discard_cnt %u\n" + " total_packet_cnt %u\n", + xd->device_index, + xd->efd_agent.last_burst_sz, + xd->efd_agent.max_burst_sz, + xd->efd_agent.full_frames_cnt, + xd->efd_agent.consec_full_frames_cnt, + xd->efd_agent.congestion_cnt, + xd->efd_agent.last_poll_time, + xd->efd_agent.max_poll_delay, + xd->efd_agent.discard_cnt, + xd->efd_agent.total_packet_cnt); + + u32 device_queue_sz = rte_eth_rx_queue_count(xd->device_index, + 0 /* queue_id */); + vlib_cli_output(vm, + " device_queue_sz %u\n", + device_queue_sz); +} + + +/* + * Trigger threads to grab frame queue trace data + */ +static clib_error_t * +trace_frame_queue (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + clib_error_t * error = NULL; + frame_queue_trace_t *fqt; + u32 num_fq; + u32 fqix; + u32 enable = 0; + + if (unformat(input, "on")) { + enable = 1; + } else if (unformat(input, "off")) { + enable = 0; + } else { + return clib_error_return(0, "expecting on or off"); + } + + num_fq = vec_len(vlib_frame_queues); + if (num_fq == 0) + { + vlib_cli_output(vm, "No frame queues exist\n"); + return error; + } + + // Allocate storage for trace if necessary + vec_validate_aligned(frame_queue_traces, num_fq-1, CLIB_CACHE_LINE_BYTES); + + for (fqix=0; fqix<num_fq; fqix++) { + fqt = &frame_queue_traces[fqix]; + + memset(fqt->n_vectors, 0xff, sizeof(fqt->n_vectors)); + fqt->written = 0; + vlib_frame_queues[fqix]->trace = enable; + } + return error; +} + +VLIB_CLI_COMMAND (cmd_trace_frame_queue,static) = { + .path = "trace frame-queue", + .short_help = "trace frame-queue (on|off)", + .function = trace_frame_queue, +}; + + +/* + * Display frame queue trace data gathered by threads. + */ +static clib_error_t * +show_frame_queue (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + clib_error_t * error = NULL; + frame_queue_trace_t *fqt; + u32 num_fq; + u32 fqix; + + num_fq = vec_len(frame_queue_traces); + if (num_fq == 0) + { + vlib_cli_output(vm, "No trace data for frame queues\n"); + return error; + } + + for (fqix=0; fqix<num_fq; fqix++) { + fqt = &frame_queue_traces[fqix]; + + vlib_cli_output(vm, "Thread %d %v\n", fqix, vlib_worker_threads[fqix].name); + + if (fqt->written == 0) + { + vlib_cli_output(vm, " no trace data\n"); + continue; + } + + vlib_cli_output(vm, " vector-threshold %d ring size %d in use %d\n", + fqt->threshold, fqt->nelts, fqt->n_in_use); + vlib_cli_output(vm, " head %12d head_hint %12d tail %12d\n", + fqt->head, fqt->head_hint, fqt->tail); + vlib_cli_output(vm, " %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d\n", + fqt->n_vectors[0], fqt->n_vectors[1], fqt->n_vectors[2], fqt->n_vectors[3], + fqt->n_vectors[4], fqt->n_vectors[5], fqt->n_vectors[6], fqt->n_vectors[7], + fqt->n_vectors[8], fqt->n_vectors[9], fqt->n_vectors[10], fqt->n_vectors[11], + fqt->n_vectors[12], fqt->n_vectors[13], fqt->n_vectors[14], fqt->n_vectors[15]); + + if (fqt->nelts > 16) + { + vlib_cli_output(vm, " %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d\n", + fqt->n_vectors[16], fqt->n_vectors[17], fqt->n_vectors[18], fqt->n_vectors[19], + fqt->n_vectors[20], fqt->n_vectors[21], fqt->n_vectors[22], fqt->n_vectors[23], + fqt->n_vectors[24], fqt->n_vectors[25], fqt->n_vectors[26], fqt->n_vectors[27], + fqt->n_vectors[28], fqt->n_vectors[29], fqt->n_vectors[30], fqt->n_vectors[31]); + } + } + return error; +} + +VLIB_CLI_COMMAND (cmd_show_frame_queue,static) = { + .path = "show frame-queue", + .short_help = "show frame-queue trace", + .function = show_frame_queue, +}; + + +/* + * Modify the number of elements on the frame_queues + */ +static clib_error_t * +test_frame_queue_nelts (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + clib_error_t * error = NULL; + u32 num_fq; + u32 fqix; + u32 nelts = 0; + + unformat(input, "%d", &nelts); + if ((nelts != 4) && (nelts != 8) && (nelts != 16) && (nelts != 32)) { + return clib_error_return(0, "expecting 4,8,16,32"); + } + + num_fq = vec_len(vlib_frame_queues); + if (num_fq == 0) + { + vlib_cli_output(vm, "No frame queues exist\n"); + return error; + } + + for (fqix=0; fqix<num_fq; fqix++) { + vlib_frame_queues[fqix]->nelts = nelts; + } + + return error; +} + +VLIB_CLI_COMMAND (cmd_test_frame_queue_nelts,static) = { + .path = "test frame-queue nelts", + .short_help = "test frame-queue nelts (4,8,16,32)", + .function = test_frame_queue_nelts, +}; + + +/* + * Modify the max number of packets pulled off the frame queues + */ +static clib_error_t * +test_frame_queue_threshold (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + clib_error_t * error = NULL; + u32 num_fq; + u32 fqix; + u32 threshold = 0; + + if (unformat(input, "%d", &threshold)) { + } else { + vlib_cli_output(vm, "expecting threshold value\n"); + return error; + } + + if (threshold == 0) + threshold = ~0; + + num_fq = vec_len(vlib_frame_queues); + if (num_fq == 0) + { + vlib_cli_output(vm, "No frame queues exist\n"); + return error; + } + + for (fqix=0; fqix<num_fq; fqix++) { + vlib_frame_queues[fqix]->vector_threshold = threshold; + } + + return error; +} + +VLIB_CLI_COMMAND (cmd_test_frame_queue_threshold,static) = { + .path = "test frame-queue threshold", + .short_help = "test frame-queue threshold N (0=no limit)", + .function = test_frame_queue_threshold, +}; + +static void +show_efd_config (vlib_main_t * vm) +{ + vlib_thread_main_t * tm = vlib_get_thread_main(); + dpdk_main_t * dm = &dpdk_main; + + vlib_cli_output(vm, + "dpdk: (0x%04x) enabled:%d monitor:%d drop_all:%d\n" + " dpdk_queue_hi_thresh %d\n" + " consec_full_frames_hi_thresh %d\n" + "---------\n" + "worker: (0x%04x) enabled:%d monitor:%d\n" + " worker_queue_hi_thresh %d\n", + dm->efd.enabled, + ((dm->efd.enabled & DPDK_EFD_DISCARD_ENABLED) ? 1:0), + ((dm->efd.enabled & DPDK_EFD_MONITOR_ENABLED) ? 1:0), + ((dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED) ? 1:0), + dm->efd.queue_hi_thresh, + dm->efd.consec_full_frames_hi_thresh, + tm->efd.enabled, + ((tm->efd.enabled & VLIB_EFD_DISCARD_ENABLED) ? 1:0), + ((dm->efd.enabled & VLIB_EFD_MONITOR_ENABLED) ? 1:0), + tm->efd.queue_hi_thresh); + vlib_cli_output(vm, + "---------\n" + "ip_prec_bitmap 0x%02x\n" + "mpls_exp_bitmap 0x%02x\n" + "vlan_cos_bitmap 0x%02x\n", + tm->efd.ip_prec_bitmap, + tm->efd.mpls_exp_bitmap, + tm->efd.vlan_cos_bitmap); +} + +static clib_error_t * +show_efd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + + if (unformat(input, "config")) { + show_efd_config(vm); + } else if (unformat(input, "dpdk")) { + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + u32 device_id = ~0; + + unformat(input, "device %d", &device_id); + vec_foreach (xd, dm->devices) { + if ((xd->device_index == device_id) || (device_id == ~0)) { + show_dpdk_device_stats(vm, xd); + } + } + } else if (unformat(input, "worker")) { + vlib_thread_main_t * tm = vlib_get_thread_main(); + vlib_frame_queue_t *fq; + vlib_thread_registration_t * tr; + int thread_id; + u32 num_workers = 0; + u32 first_worker_index = 0; + uword * p; + + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + ASSERT (p); + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + num_workers = tr->count; + first_worker_index = tr->first_index; + } + + vlib_cli_output(vm, + "num_workers %d\n" + "first_worker_index %d\n" + "vlib_frame_queues[%d]:\n", + num_workers, + first_worker_index, + tm->n_vlib_mains); + + for (thread_id = 0; thread_id < tm->n_vlib_mains; thread_id++) { + fq = vlib_frame_queues[thread_id]; + if (fq) { + vlib_cli_output(vm, + "%2d: frames_queued %u\n" + " frames_queued_hint %u\n" + " enqueue_full_events %u\n" + " enqueue_efd_discards %u\n", + thread_id, + (fq->tail - fq->head), + (fq->tail - fq->head_hint), + fq->enqueue_full_events, + fq->enqueue_efd_discards); + } + } + } else if (unformat(input, "help")) { + vlib_cli_output(vm, "Usage: show efd config | " + "dpdk [device <id>] | worker\n"); + } else { + show_efd_config(vm); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_efd_command, static) = { + .path = "show efd", + .short_help = "Show efd [device <id>] | [config]", + .function = show_efd, +}; + +static clib_error_t * +clear_efd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + vlib_thread_main_t * tm = vlib_get_thread_main(); + vlib_frame_queue_t *fq; + int thread_id; + + vec_foreach (xd, dm->devices) { + xd->efd_agent.last_burst_sz = 0; + xd->efd_agent.max_burst_sz = 0; + xd->efd_agent.full_frames_cnt = 0; + xd->efd_agent.consec_full_frames_cnt = 0; + xd->efd_agent.congestion_cnt = 0; + xd->efd_agent.last_poll_time = 0; + xd->efd_agent.max_poll_delay = 0; + xd->efd_agent.discard_cnt = 0; + xd->efd_agent.total_packet_cnt = 0; + } + + for (thread_id = 0; thread_id < tm->n_vlib_mains; thread_id++) { + fq = vlib_frame_queues[thread_id]; + if (fq) { + fq->enqueue_full_events = 0; + fq->enqueue_efd_discards = 0; + } + } + + return 0; +} + +VLIB_CLI_COMMAND (clear_efd_command,static) = { + .path = "clear efd", + .short_help = "Clear early-fast-discard counters", + .function = clear_efd, +}; + +static clib_error_t * +parse_op_and_prec (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd, + char *prec_type, u8 *prec_bitmap) +{ + clib_error_t * error = NULL; + u8 op = 0; + u8 prec = 0; + + if (unformat(input, "ge")) { + op = EFD_OPERATION_GREATER_OR_EQUAL; + } else if (unformat(input, "lt")) { + op = EFD_OPERATION_LESS_THAN; + } else if (unformat(input, "help")) { + vlib_cli_output(vm, + "enter operation [ge | lt] and precedence <0-7>)"); + return (error); + } else { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + + if (unformat (input, "%u", &prec)) { + if (prec > 7) { + return clib_error_return(0, "precedence %d is out of range <0-7>", + prec); + } + } else { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + + set_efd_bitmap(prec_bitmap, prec, op); + + vlib_cli_output(vm, + "EFD will be set for %s precedence %s%u%s.", + prec_type, + (op == EFD_OPERATION_LESS_THAN) ? "less than " : "", + prec, + (op == EFD_OPERATION_GREATER_OR_EQUAL) ? " and greater" : ""); + + return (error); +} + + +static clib_error_t * +set_efd (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + dpdk_main_t * dm = &dpdk_main; + vlib_thread_main_t * tm = vlib_get_thread_main(); + clib_error_t * error = NULL; + + if (unformat(input, "enable")) { + if (unformat(input, "dpdk")) { + dm->efd.enabled |= DPDK_EFD_DISCARD_ENABLED; + } else if (unformat(input, "worker")) { + tm->efd.enabled |= VLIB_EFD_DISCARD_ENABLED; + } else if (unformat(input, "monitor")) { + dm->efd.enabled |= DPDK_EFD_MONITOR_ENABLED; + tm->efd.enabled |= VLIB_EFD_MONITOR_ENABLED; + } else if (unformat(input, "drop_all")) { + dm->efd.enabled |= DPDK_EFD_DROPALL_ENABLED; + } else if (unformat(input, "default")) { + dm->efd.enabled = (DPDK_EFD_DISCARD_ENABLED | + DPDK_EFD_MONITOR_ENABLED); + tm->efd.enabled = (VLIB_EFD_DISCARD_ENABLED | + VLIB_EFD_MONITOR_ENABLED); + } else { + return clib_error_return(0, "Usage: set efd enable [dpdk | " + "worker | monitor | drop_all | default]"); + } + } else if (unformat(input, "disable")) { + if (unformat(input, "dpdk")) { + dm->efd.enabled &= ~DPDK_EFD_DISCARD_ENABLED; + } else if (unformat(input, "worker")) { + tm->efd.enabled &= ~VLIB_EFD_DISCARD_ENABLED; + } else if (unformat(input, "monitor")) { + dm->efd.enabled &= ~DPDK_EFD_MONITOR_ENABLED; + tm->efd.enabled &= ~VLIB_EFD_MONITOR_ENABLED; + } else if (unformat(input, "drop_all")) { + dm->efd.enabled &= ~DPDK_EFD_DROPALL_ENABLED; + } else if (unformat(input, "all")) { + dm->efd.enabled = 0; + tm->efd.enabled = 0; + } else { + return clib_error_return(0, "Usage: set efd disable [dpdk | " + "worker | monitor | drop_all | all]"); + } + } else if (unformat(input, "worker_queue_hi_thresh")) { + u32 mark; + if (unformat (input, "%u", &mark)) { + tm->efd.queue_hi_thresh = mark; + } else { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + } else if (unformat(input, "dpdk_device_hi_thresh")) { + u32 thresh; + if (unformat (input, "%u", &thresh)) { + dm->efd.queue_hi_thresh = thresh; + } else { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + } else if (unformat(input, "consec_full_frames_hi_thresh")) { + u32 thresh; + if (unformat (input, "%u", &thresh)) { + dm->efd.consec_full_frames_hi_thresh = thresh; + } else { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + } else if (unformat(input, "ip-prec")) { + return (parse_op_and_prec(vm, input, cmd, + "ip", &tm->efd.ip_prec_bitmap)); + } else if (unformat(input, "mpls-exp")) { + return (parse_op_and_prec(vm, input, cmd, + "mpls", &tm->efd.mpls_exp_bitmap)); + } else if (unformat(input, "vlan-cos")) { + return (parse_op_and_prec(vm, input, cmd, + "vlan", &tm->efd.vlan_cos_bitmap)); + } else if (unformat(input, "help")) { + vlib_cli_output(vm, + "Usage:\n" + " set efd enable <dpdk | worker | monitor | drop_all | default> |\n" + " set efd disable <dpdk | worker | monitor | drop_all | all> |\n" + " set efd <ip-prec | mpls-exp | vlan-cos> <ge | lt> <0-7>\n" + " set efd worker_queue_hi_thresh <0-32> |\n" + " set efd dpdk_device_hi_thresh <0-%d> |\n" + " set efd consec_full_frames_hi_thresh <count> |\n", + DPDK_NB_RX_DESC_10GE); + } else { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + + return error; +} + +VLIB_CLI_COMMAND (cmd_set_efd,static) = { + .path = "set efd", + .short_help = "set early-fast-discard commands", + .function = set_efd, +}; + +static clib_error_t * +set_dpdk_if_desc (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + dpdk_main_t * dm = &dpdk_main; + vnet_hw_interface_t * hw; + dpdk_device_t * xd; + u32 hw_if_index = (u32) ~0; + u32 nb_rx_desc = (u32) ~0; + u32 nb_tx_desc = (u32) ~0; + clib_error_t * rv; + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "tx %d", &nb_tx_desc)) + ; + else if (unformat (line_input, "rx %d", &nb_rx_desc)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~0) + return clib_error_return (0, "please specify valid interface name"); + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if (xd->dev_type != VNET_DPDK_DEV_ETH) + return clib_error_return (0, "number of descriptors can be set only for " + "physical devices"); + + if ((nb_rx_desc == (u32) ~0 || nb_rx_desc == xd->nb_rx_desc) && + (nb_tx_desc == (u32) ~0 || nb_tx_desc == xd->nb_tx_desc)) + return clib_error_return (0, "nothing changed"); + + if (nb_rx_desc != (u32) ~0) + xd->nb_rx_desc = nb_rx_desc; + + if (nb_tx_desc != (u32) ~0) + xd->nb_rx_desc = nb_rx_desc; + + rv = dpdk_port_setup(dm, xd); + + return rv < 0 ? rv : 0; +} + +VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = { + .path = "set dpdk interface descriptors", + .short_help = "set dpdk interface descriptors <if-name> [rx <n>] [tx <n>]", + .function = set_dpdk_if_desc, +}; + +static clib_error_t * +show_dpdk_if_placement (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vlib_thread_main_t * tm = vlib_get_thread_main(); + dpdk_main_t * dm = &dpdk_main; + dpdk_device_and_queue_t * dq; + int cpu; + + if (tm->n_vlib_mains == 1) + vlib_cli_output(vm, "All interfaces are handled by main thread"); + + for(cpu = 0; cpu < vec_len(dm->devices_by_cpu); cpu++) + { + if (vec_len(dm->devices_by_cpu[cpu])) + vlib_cli_output(vm, "Thread %u (%s at lcore %u):", cpu, + vlib_worker_threads[cpu].name, + vlib_worker_threads[cpu].dpdk_lcore_id); + + vec_foreach(dq, dm->devices_by_cpu[cpu]) + { + u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; + vnet_hw_interface_t * hi = vnet_get_hw_interface(dm->vnet_main, hw_if_index); + vlib_cli_output(vm, " %v queue %u", hi->name, dq->queue_id); + } + } + return 0; +} + +VLIB_CLI_COMMAND (cmd_show_dpdk_if_placement,static) = { + .path = "show dpdk interface placement", + .short_help = "show dpdk interface placement", + .function = show_dpdk_if_placement, +}; + +static int +dpdk_device_queue_sort(void * a1, void * a2) +{ + dpdk_device_and_queue_t * dq1 = a1; + dpdk_device_and_queue_t * dq2 = a2; + + if (dq1->device > dq2->device) + return 1; + else if (dq1->device < dq2->device) + return -1; + else if (dq1->queue_id > dq2->queue_id) + return 1; + else if (dq1->queue_id < dq2->queue_id) + return -1; + else + return 0; +} + +static clib_error_t * +set_dpdk_if_placement (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + dpdk_main_t * dm = &dpdk_main; + dpdk_device_and_queue_t * dq; + vnet_hw_interface_t * hw; + dpdk_device_t * xd; + u32 hw_if_index = (u32) ~0; + u32 queue = (u32) 0; + u32 cpu = (u32) ~0; + int i; + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "queue %d", &queue)) + ; + else if (unformat (line_input, "thread %d", &cpu)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~0) + return clib_error_return (0, "please specify valid interface name"); + + if (cpu < dm->input_cpu_first_index || + cpu >= (dm->input_cpu_first_index + dm->input_cpu_count)) + return clib_error_return (0, "please specify valid thread id"); + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + for(i = 0; i < vec_len(dm->devices_by_cpu); i++) + { + vec_foreach(dq, dm->devices_by_cpu[i]) + { + if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index && + queue == dq->queue_id) + { + if (cpu == i) /* nothing to do */ + return 0; + + vec_del1(dm->devices_by_cpu[i], dq - dm->devices_by_cpu[i]); + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->queue_id = queue; + dq->device = xd->device_index; + xd->cpu_socket_id_by_queue[queue] = + rte_lcore_to_socket_id(vlib_worker_threads[cpu].dpdk_lcore_id); + + vec_sort_with_function(dm->devices_by_cpu[i], + dpdk_device_queue_sort); + + vec_sort_with_function(dm->devices_by_cpu[cpu], + dpdk_device_queue_sort); + + if (vec_len(dm->devices_by_cpu[i]) == 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_DISABLED); + + if (vec_len(dm->devices_by_cpu[cpu]) == 1) + vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + + return 0; + } + } + } + + return clib_error_return (0, "not found"); +} + +VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { + .path = "set dpdk interface placement", + .short_help = "set dpdk interface placement <if-name> [queue <n>] thread <n>", + .function = set_dpdk_if_placement, +}; + +clib_error_t * +dpdk_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_cli_init); diff --git a/vnet/vnet/devices/dpdk/device.c b/vnet/vnet/devices/dpdk/device.c new file mode 100644 index 00000000000..a19c3131ef9 --- /dev/null +++ b/vnet/vnet/devices/dpdk/device.c @@ -0,0 +1,1483 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/format.h> +#include <vlib/unix/cj.h> + +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/dpdk/dpdk.h> + +#include "dpdk_priv.h" +#include <vppinfra/error.h> + +#define foreach_dpdk_tx_func_error \ + _(BAD_RETVAL, "DPDK tx function returned an error") \ + _(RING_FULL, "Tx packet drops (ring full)") \ + _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \ + _(REPL_FAIL, "Tx packet drops (replication failure)") + +typedef enum { +#define _(f,s) DPDK_TX_FUNC_ERROR_##f, + foreach_dpdk_tx_func_error +#undef _ + DPDK_TX_FUNC_N_ERROR, +} dpdk_tx_func_error_t; + +static char * dpdk_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_dpdk_tx_func_error +#undef _ +}; + +static struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b) +{ + vlib_main_t * vm = vlib_get_main(); + vlib_buffer_main_t * bm = vm->buffer_main; + struct rte_mbuf * first_mb = 0, * new_mb, * pkt_mb, ** prev_mb_next = 0; + u8 nb_segs, nb_segs_left; + u32 copy_bytes; + unsigned socket_id = rte_socket_id(); + + ASSERT (bm->pktmbuf_pools[socket_id]); + pkt_mb = ((struct rte_mbuf *)b)-1; + nb_segs = pkt_mb->nb_segs; + for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--) + { + if (PREDICT_FALSE(pkt_mb == 0)) + { + clib_warning ("Missing %d mbuf chain segment(s): " + "(nb_segs = %d, nb_segs_left = %d)!", + nb_segs - nb_segs_left, nb_segs, nb_segs_left); + if (first_mb) + rte_pktmbuf_free(first_mb); + return NULL; + } + new_mb = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]); + if (PREDICT_FALSE(new_mb == 0)) + { + if (first_mb) + rte_pktmbuf_free(first_mb); + return NULL; + } + + /* + * Copy packet info into 1st segment. + */ + if (first_mb == 0) + { + first_mb = new_mb; + rte_pktmbuf_pkt_len (first_mb) = pkt_mb->pkt_len; + first_mb->nb_segs = pkt_mb->nb_segs; + first_mb->port = pkt_mb->port; +#ifdef DAW_FIXME // TX Offload support TBD + first_mb->vlan_macip = pkt_mb->vlan_macip; + first_mb->hash = pkt_mb->hash; + first_mb->ol_flags = pkt_mb->ol_flags +#endif + } + else + { + ASSERT(prev_mb_next != 0); + *prev_mb_next = new_mb; + } + + /* + * Copy packet segment data into new mbuf segment. + */ + rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len; + copy_bytes = pkt_mb->data_len + RTE_PKTMBUF_HEADROOM; + ASSERT(copy_bytes <= pkt_mb->buf_len); + memcpy(new_mb->buf_addr, pkt_mb->buf_addr, copy_bytes); + + prev_mb_next = &new_mb->next; + pkt_mb = pkt_mb->next; + } + + ASSERT(pkt_mb == 0); + __rte_mbuf_sanity_check(first_mb, 1); + + return first_mb; +} + +typedef struct { + u32 buffer_index; + u16 device_index; + u8 queue_index; + struct rte_mbuf mb; + /* Copy of VLIB buffer; packet data stored in pre_data. */ + vlib_buffer_t buffer; +} dpdk_tx_dma_trace_t; + +static void +dpdk_tx_trace_buffer (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, + u32 buffer_index, + vlib_buffer_t * buffer) +{ + vlib_main_t * vm = vlib_get_main(); + dpdk_tx_dma_trace_t * t0; + struct rte_mbuf * mb; + + mb = ((struct rte_mbuf *)buffer)-1; + + t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0])); + t0->queue_index = queue_id; + t0->device_index = xd->device_index; + t0->buffer_index = buffer_index; + memcpy (&t0->mb, mb, sizeof (t0->mb)); + memcpy (&t0->buffer, buffer, sizeof (buffer[0]) - sizeof (buffer->pre_data)); + memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data, + sizeof (t0->buffer.pre_data)); +} + +/* + * This function calls the dpdk's tx_burst function to transmit the packets + * on the tx_vector. It manages a lock per-device if the device does not + * support multiple queues. It returns the number of packets untransmitted + * on the tx_vector. If all packets are transmitted (the normal case), the + * function returns 0. + * + * The tx_burst function may not be able to transmit all packets because the + * dpdk ring is full. If a flowcontrol callback function has been configured + * then the function simply returns. If no callback has been configured, the + * function will retry calling tx_burst with the remaining packets. This will + * continue until all packets are transmitted or tx_burst indicates no packets + * could be transmitted. (The caller can drop the remaining packets.) + * + * The function assumes there is at least one packet on the tx_vector. + */ +static_always_inline +u32 tx_burst_vector_internal (vlib_main_t * vm, + dpdk_device_t * xd, + struct rte_mbuf ** tx_vector) +{ + dpdk_main_t * dm = &dpdk_main; + u32 n_packets; + u32 tx_head; + u32 tx_tail; + u32 n_retry; + int rv; + int queue_id; + tx_ring_hdr_t *ring; + + ring = vec_header(tx_vector, sizeof(*ring)); + + n_packets = ring->tx_head - ring->tx_tail; + + tx_head = ring->tx_head % DPDK_TX_RING_SIZE; + + /* + * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to + * unpredictable results. + */ + ASSERT(n_packets > 0); + + /* + * Check for tx_vector overflow. If this fails it is a system configuration + * error. The ring should be sized big enough to handle the largest un-flowed + * off burst from a traffic manager. A larger size also helps performance + * a bit because it decreases the probability of having to issue two tx_burst + * calls due to a ring wrap. + */ + ASSERT(n_packets < DPDK_TX_RING_SIZE); + + /* + * If there is no flowcontrol callback, there is only temporary buffering + * on the tx_vector and so the tail should always be 0. + */ + ASSERT(dm->flowcontrol_callback || ring->tx_tail == 0); + + /* + * If there is a flowcontrol callback, don't retry any incomplete tx_bursts. + * Apply backpressure instead. If there is no callback, keep retrying until + * a tx_burst sends no packets. n_retry of 255 essentially means no retry + * limit. + */ + n_retry = dm->flowcontrol_callback ? 0 : 255; + + queue_id = vm->cpu_index; + + do { + /* start the burst at the tail */ + tx_tail = ring->tx_tail % DPDK_TX_RING_SIZE; + + /* + * This device only supports one TX queue, + * and we're running multi-threaded... + */ + if (PREDICT_FALSE(xd->lockp != 0)) + { + queue_id = 0; + while (__sync_lock_test_and_set (xd->lockp, 1)) + /* zzzz */; + } + + if (PREDICT_TRUE(xd->dev_type == VNET_DPDK_DEV_ETH)) + { + if (PREDICT_TRUE(tx_head > tx_tail)) + { + /* no wrap, transmit in one burst */ + rv = rte_eth_tx_burst(xd->device_index, + (uint16_t) queue_id, + &tx_vector[tx_tail], + (uint16_t) (tx_head-tx_tail)); + } + else + { + /* + * This can only happen if there is a flowcontrol callback. + * We need to split the transmit into two calls: one for + * the packets up to the wrap point, and one to continue + * at the start of the ring. + * Transmit pkts up to the wrap point. + */ + rv = rte_eth_tx_burst(xd->device_index, + (uint16_t) queue_id, + &tx_vector[tx_tail], + (uint16_t) (DPDK_TX_RING_SIZE - tx_tail)); + + /* + * If we transmitted everything we wanted, then allow 1 retry + * so we can try to transmit the rest. If we didn't transmit + * everything, stop now. + */ + n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0; + } + } + else if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) + { + if (PREDICT_TRUE(tx_head > tx_tail)) + { + /* no wrap, transmit in one burst */ + rv = rte_vhost_enqueue_burst(&xd->vu_vhost_dev, VIRTIO_RXQ, + &tx_vector[tx_tail], + (uint16_t) (tx_head-tx_tail)); + if (PREDICT_TRUE(rv > 0)) + { + if (dpdk_vhost_user_want_interrupt(xd, VIRTIO_RXQ)) { + dpdk_vu_vring *vring = &(xd->vu_intf->vrings[VIRTIO_RXQ]); + vring->n_since_last_int += rv; + + if (vring->n_since_last_int > dm->vhost_coalesce_frames) + dpdk_vhost_user_send_interrupt(dm->vlib_main, xd, VIRTIO_RXQ); + } + + int c = rv; + while(c--) + rte_pktmbuf_free (tx_vector[tx_tail+c]); + } + } + else + { + /* + * If we transmitted everything we wanted, then allow 1 retry + * so we can try to transmit the rest. If we didn't transmit + * everything, stop now. + */ + rv = rte_vhost_enqueue_burst(&xd->vu_vhost_dev, VIRTIO_RXQ, + &tx_vector[tx_tail], + (uint16_t) (DPDK_TX_RING_SIZE - tx_tail)); + + if (PREDICT_TRUE(rv > 0)) + { + if (dpdk_vhost_user_want_interrupt(xd, VIRTIO_RXQ)) { + dpdk_vu_vring *vring = &(xd->vu_intf->vrings[VIRTIO_RXQ]); + vring->n_since_last_int += rv; + + if (vring->n_since_last_int > dm->vhost_coalesce_frames) + dpdk_vhost_user_send_interrupt(dm->vlib_main, xd, VIRTIO_RXQ); + } + + int c = rv; + while(c--) + rte_pktmbuf_free (tx_vector[tx_tail+c]); + } + + n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0; + } + } + else if (xd->dev_type == VNET_DPDK_DEV_KNI) + { + if (PREDICT_TRUE(tx_head > tx_tail)) + { + /* no wrap, transmit in one burst */ + rv = rte_kni_tx_burst(xd->kni, + &tx_vector[tx_tail], + (uint16_t) (tx_head-tx_tail)); + } + else + { + /* + * This can only happen if there is a flowcontrol callback. + * We need to split the transmit into two calls: one for + * the packets up to the wrap point, and one to continue + * at the start of the ring. + * Transmit pkts up to the wrap point. + */ + rv = rte_kni_tx_burst(xd->kni, + &tx_vector[tx_tail], + (uint16_t) (DPDK_TX_RING_SIZE - tx_tail)); + + /* + * If we transmitted everything we wanted, then allow 1 retry + * so we can try to transmit the rest. If we didn't transmit + * everything, stop now. + */ + n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0; + } + } + else + { + ASSERT(0); + rv = 0; + } + + if (PREDICT_FALSE(xd->lockp != 0)) + *xd->lockp = 0; + + if (PREDICT_FALSE(rv < 0)) + { + // emit non-fatal message, bump counter + vnet_main_t * vnm = dm->vnet_main; + vnet_interface_main_t * im = &vnm->interface_main; + u32 node_index; + + node_index = vec_elt_at_index(im->hw_interfaces, + xd->vlib_hw_if_index)->tx_node_index; + + vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1); + clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index, rv); + return n_packets; // untransmitted packets + } + ring->tx_tail += (u16)rv; + n_packets -= (uint16_t) rv; + } while (rv && n_packets && (n_retry>0)); + + return n_packets; +} + + +/* + * This function transmits any packets on the interface's tx_vector and returns + * the number of packets untransmitted on the tx_vector. If the tx_vector is + * empty the function simply returns 0. + * + * It is intended to be called by a traffic manager which has flowed-off an + * interface to see if the interface can be flowed-on again. + */ +u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + int queue_id; + struct rte_mbuf ** tx_vector; + tx_ring_hdr_t *ring; + + /* param is dev_instance and not hw_if_index to save another lookup */ + xd = vec_elt_at_index (dm->devices, dev_instance); + + queue_id = vm->cpu_index; + tx_vector = xd->tx_vectors[queue_id]; + + /* If no packets on the ring, don't bother calling tx function */ + ring = vec_header(tx_vector, sizeof(*ring)); + if (ring->tx_head == ring->tx_tail) + { + return 0; + } + + return tx_burst_vector_internal (vm, xd, tx_vector); +} + +/* + * Transmits the packets on the frame to the interface associated with the + * node. It first copies packets on the frame to a tx_vector containing the + * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal + * which calls the dpdk tx_burst function. + * + * The tx_vector is treated slightly differently depending on whether or + * not a flowcontrol callback function has been configured. If there is no + * callback, the tx_vector is a temporary array of rte_mbuf packet pointers. + * Its entries are written and consumed before the function exits. + * + * If there is a callback then the transmit is being invoked in the presence + * of a traffic manager. Here the tx_vector is treated like a ring of rte_mbuf + * pointers. If not all packets can be transmitted, the untransmitted packets + * stay on the tx_vector until the next call. The callback allows the traffic + * manager to flow-off dequeues to the interface. The companion function + * dpdk_interface_tx_vector() allows the traffic manager to detect when + * it should flow-on the interface again. + */ +static uword +dpdk_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + dpdk_main_t * dm = &dpdk_main; + vnet_interface_output_runtime_t * rd = (void *) node->runtime_data; + dpdk_device_t * xd = vec_elt_at_index (dm->devices, rd->dev_instance); + u32 n_packets = f->n_vectors; + u32 n_left; + u32 * from; + struct rte_mbuf ** tx_vector; + int i; + int queue_id; + u32 my_cpu; + u32 tx_pkts = 0; + tx_ring_hdr_t *ring; + u32 n_on_ring; + + my_cpu = vm->cpu_index; + + queue_id = my_cpu; + + tx_vector = xd->tx_vectors[queue_id]; + ring = vec_header(tx_vector, sizeof(*ring)); + + n_on_ring = ring->tx_head - ring->tx_tail; + from = vlib_frame_vector_args (f); + + ASSERT(n_packets <= VLIB_FRAME_SIZE); + + if (PREDICT_FALSE(n_on_ring + n_packets > DPDK_TX_RING_SIZE)) + { + /* + * Overflowing the ring should never happen. + * If it does then drop the whole frame. + */ + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL, + n_packets); + + while (n_packets--) + { + u32 bi0 = from[n_packets]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + struct rte_mbuf *mb0 = ((struct rte_mbuf *)b0) - 1; + rte_pktmbuf_free (mb0); + } + return n_on_ring; + } + + if (PREDICT_FALSE(dm->tx_pcap_enable)) + { + n_left = n_packets; + while (n_left > 0) + { + u32 bi0 = from[0]; + vlib_buffer_t * b0 = vlib_get_buffer (vm, bi0); + if (dm->pcap_sw_if_index == 0 || + dm->pcap_sw_if_index == vnet_buffer(b0)->sw_if_index [VLIB_TX]) + pcap_add_buffer (&dm->pcap_main, vm, bi0, 512); + from++; + n_left--; + } + } + + from = vlib_frame_vector_args (f); + n_left = n_packets; + i = ring->tx_head % DPDK_TX_RING_SIZE; + + while (n_left >= 4) + { + u32 bi0, bi1; + u32 pi0, pi1; + struct rte_mbuf * mb0, * mb1; + struct rte_mbuf * prefmb0, * prefmb1; + vlib_buffer_t * b0, * b1; + vlib_buffer_t * pref0, * pref1; + i16 delta0, delta1; + u16 new_data_len0, new_data_len1; + u16 new_pkt_len0, new_pkt_len1; + u32 any_clone; + + pi0 = from[2]; + pi1 = from[3]; + pref0 = vlib_get_buffer (vm, pi0); + pref1 = vlib_get_buffer (vm, pi1); + + prefmb0 = ((struct rte_mbuf *)pref0) - 1; + prefmb1 = ((struct rte_mbuf *)pref1) - 1; + + CLIB_PREFETCH(prefmb0, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH(pref0, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH(prefmb1, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH(pref1, CLIB_CACHE_LINE_BYTES, LOAD); + + bi0 = from[0]; + bi1 = from[1]; + from += 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + mb0 = ((struct rte_mbuf *)b0) - 1; + mb1 = ((struct rte_mbuf *)b1) - 1; + + any_clone = b0->clone_count | b1->clone_count; + if (PREDICT_FALSE(any_clone != 0)) + { + if (PREDICT_FALSE(b0->clone_count != 0)) + { + struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0); + if (PREDICT_FALSE(mb0_new == 0)) + { + vlib_error_count (vm, node->node_index, + DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); + b0->flags |= VLIB_BUFFER_REPL_FAIL; + } + else + mb0 = mb0_new; + vec_add1 (dm->recycle[my_cpu], bi0); + } + if (PREDICT_FALSE(b1->clone_count != 0)) + { + struct rte_mbuf * mb1_new = dpdk_replicate_packet_mb (b1); + if (PREDICT_FALSE(mb1_new == 0)) + { + vlib_error_count (vm, node->node_index, + DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); + b1->flags |= VLIB_BUFFER_REPL_FAIL; + } + else + mb1 = mb1_new; + vec_add1 (dm->recycle[my_cpu], bi1); + } + } + + delta0 = PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 : + vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len; + delta1 = PREDICT_FALSE(b1->flags & VLIB_BUFFER_REPL_FAIL) ? 0 : + vlib_buffer_length_in_chain (vm, b1) - (i16) mb1->pkt_len; + + new_data_len0 = (u16)((i16) mb0->data_len + delta0); + new_data_len1 = (u16)((i16) mb1->data_len + delta1); + new_pkt_len0 = (u16)((i16) mb0->pkt_len + delta0); + new_pkt_len1 = (u16)((i16) mb1->pkt_len + delta1); + + b0->current_length = new_data_len0; + b1->current_length = new_data_len1; + mb0->data_len = new_data_len0; + mb1->data_len = new_data_len1; + mb0->pkt_len = new_pkt_len0; + mb1->pkt_len = new_pkt_len1; + + mb0->data_off = (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) ? + mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data); + mb1->data_off = (PREDICT_FALSE(b1->flags & VLIB_BUFFER_REPL_FAIL)) ? + mb1->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b1->current_data); + + if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE)) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); + if (b1->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1); + } + + if (PREDICT_TRUE(any_clone == 0)) + { + tx_vector[i % DPDK_TX_RING_SIZE] = mb0; + i++; + tx_vector[i % DPDK_TX_RING_SIZE] = mb1; + i++; + } + else + { + /* cloning was done, need to check for failure */ + if (PREDICT_TRUE((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + { + tx_vector[i % DPDK_TX_RING_SIZE] = mb0; + i++; + } + if (PREDICT_TRUE((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + { + tx_vector[i % DPDK_TX_RING_SIZE] = mb1; + i++; + } + } + + n_left -= 2; + } + while (n_left > 0) + { + u32 bi0; + struct rte_mbuf * mb0; + vlib_buffer_t * b0; + i16 delta0; + u16 new_data_len0; + u16 new_pkt_len0; + + bi0 = from[0]; + from++; + + b0 = vlib_get_buffer (vm, bi0); + + mb0 = ((struct rte_mbuf *)b0) - 1; + if (PREDICT_FALSE(b0->clone_count != 0)) + { + struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0); + if (PREDICT_FALSE(mb0_new == 0)) + { + vlib_error_count (vm, node->node_index, + DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); + b0->flags |= VLIB_BUFFER_REPL_FAIL; + } + else + mb0 = mb0_new; + vec_add1 (dm->recycle[my_cpu], bi0); + } + + delta0 = PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 : + vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len; + + new_data_len0 = (u16)((i16) mb0->data_len + delta0); + new_pkt_len0 = (u16)((i16) mb0->pkt_len + delta0); + + b0->current_length = new_data_len0; + mb0->data_len = new_data_len0; + mb0->pkt_len = new_pkt_len0; + mb0->data_off = (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) ? + mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data); + + if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE)) + if (b0->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); + + if (PREDICT_TRUE((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + { + tx_vector[i % DPDK_TX_RING_SIZE] = mb0; + i++; + } + n_left--; + } + + /* account for additional packets in the ring */ + ring->tx_head += n_packets; + n_on_ring = ring->tx_head - ring->tx_tail; + + /* transmit as many packets as possible */ + n_packets = tx_burst_vector_internal (vm, xd, tx_vector); + + /* + * tx_pkts is the number of packets successfully transmitted + * This is the number originally on ring minus the number remaining on ring + */ + tx_pkts = n_on_ring - n_packets; + + if (PREDICT_FALSE(dm->flowcontrol_callback != 0)) + { + if (PREDICT_FALSE(n_packets)) + { + /* Callback may want to enable flowcontrol */ + dm->flowcontrol_callback(vm, xd->vlib_hw_if_index, ring->tx_head - ring->tx_tail); + } + else + { + /* Reset head/tail to avoid unnecessary wrap */ + ring->tx_head = 0; + ring->tx_tail = 0; + } + } + else + { + /* If there is no callback then drop any non-transmitted packets */ + if (PREDICT_FALSE(n_packets)) + { + vlib_simple_counter_main_t * cm; + vnet_main_t * vnm = vnet_get_main(); + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, n_packets); + + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, + n_packets); + + while (n_packets--) + rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); + } + + /* Reset head/tail to avoid unnecessary wrap */ + ring->tx_head = 0; + ring->tx_tail = 0; + } + + /* Recycle replicated buffers */ + if (PREDICT_FALSE(vec_len(dm->recycle[my_cpu]))) + { + vlib_buffer_free (vm, dm->recycle[my_cpu], vec_len(dm->recycle[my_cpu])); + _vec_len(dm->recycle[my_cpu]) = 0; + } + + ASSERT(ring->tx_head >= ring->tx_tail); + + return tx_pkts; +} + +static int dpdk_device_renumber (vnet_hw_interface_t * hi, + u32 new_dev_instance) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + if (!xd || xd->dev_type != VNET_DPDK_DEV_VHOST_USER) { + clib_warning("cannot renumber non-vhost-user interface (sw_if_index: %d)", + hi->sw_if_index); + return 0; + } + + xd->vu_if_id = new_dev_instance; + return 0; +} + +static u8 * format_dpdk_device_name (u8 * s, va_list * args) +{ + dpdk_main_t * dm = &dpdk_main; + char *devname_format; + char *device_name; + u32 i = va_arg (*args, u32); + struct rte_eth_dev_info dev_info; + + if (dm->interface_name_format_decimal) + devname_format = "%s%d/%d/%d"; + else + devname_format = "%s%x/%x/%x"; + + if (dm->devices[i].dev_type == VNET_DPDK_DEV_KNI) { + return format(s, "kni%d", dm->devices[i].kni_port_id); + } else if (dm->devices[i].dev_type == VNET_DPDK_DEV_VHOST_USER) { + return format(s, "VirtualEthernet0/0/%d", dm->devices[i].vu_if_id); + } + switch (dm->devices[i].port_type) + { + case VNET_DPDK_PORT_TYPE_ETH_1G: + device_name = "GigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_10G: + device_name = "TenGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_40G: + device_name = "FortyGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_SWITCH: + device_name = "EthernetSwitch"; + break; + + #ifdef NETMAP + case VNET_DPDK_PORT_TYPE_NETMAP: + rte_eth_dev_info_get(i, &dev_info); + return format(s, "netmap:%s", dev_info.driver_name); + #endif + + case VNET_DPDK_PORT_TYPE_AF_PACKET: + rte_eth_dev_info_get(i, &dev_info); + return format(s, "af_packet%d", dm->devices[i].af_packet_port_id); + + default: + case VNET_DPDK_PORT_TYPE_UNKNOWN: + device_name = "UnknownEthernet"; + break; + } + + rte_eth_dev_info_get(i, &dev_info); + return format (s, devname_format, device_name, dev_info.pci_dev->addr.bus, + dev_info.pci_dev->addr.devid, + dev_info.pci_dev->addr.function); +} + +static u8 * format_dpdk_device_type (u8 * s, va_list * args) +{ + dpdk_main_t * dm = &dpdk_main; + char *dev_type; + u32 i = va_arg (*args, u32); + + if (dm->devices[i].dev_type == VNET_DPDK_DEV_KNI) { + return format(s, "Kernel NIC Interface"); + } else if (dm->devices[i].dev_type == VNET_DPDK_DEV_VHOST_USER) { + return format(s, "vhost-user interface"); + } + + switch (dm->devices[i].pmd) + { + case VNET_DPDK_PMD_E1000EM: + dev_type = "Intel 82540EM (e1000)"; + break; + + case VNET_DPDK_PMD_IGB: + dev_type = "Intel e1000"; + break; + + case VNET_DPDK_PMD_I40E: + dev_type = "Intel X710/XL710 Family"; + break; + + case VNET_DPDK_PMD_I40EVF: + dev_type = "Intel X710/XL710 Family VF"; + break; + + case VNET_DPDK_PMD_FM10K: + dev_type = "Intel FM10000 Family Ethernet Switch"; + break; + + case VNET_DPDK_PMD_IGBVF: + dev_type = "Intel e1000 VF"; + break; + + case VNET_DPDK_PMD_VIRTIO: + dev_type = "Red Hat Virtio"; + break; + + case VNET_DPDK_PMD_IXGBEVF: + dev_type = "Intel 82599 VF"; + break; + + case VNET_DPDK_PMD_IXGBE: + dev_type = "Intel 82599"; + break; + + case VNET_DPDK_PMD_VICE: + case VNET_DPDK_PMD_ENIC: + dev_type = "Cisco VIC"; + break; + + case VNET_DPDK_PMD_VMXNET3: + dev_type = "VMware VMXNET3"; + break; + +#ifdef NETMAP + case VNET_DPDK_PMD_NETMAP: + dev_type = "Netmap/Vale"; + break; +#endif + + case VNET_DPDK_PMD_AF_PACKET: + dev_type = "af_packet"; + break; + + default: + case VNET_DPDK_PMD_UNKNOWN: + dev_type = "### UNKNOWN ###"; + break; + } + + return format (s, dev_type); +} + +static u8 * format_dpdk_link_status (u8 * s, va_list * args) +{ + dpdk_device_t * xd = va_arg (*args, dpdk_device_t *); + struct rte_eth_link * l = &xd->link; + vnet_main_t * vnm = vnet_get_main(); + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); + + s = format (s, "%s ", l->link_status ? "up" : "down"); + if (l->link_status) + { + u32 promisc = rte_eth_promiscuous_get (xd->device_index); + + s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ? + "full" : "half"); + s = format (s, "speed %u mtu %d %s\n", l->link_speed, + hi->max_packet_bytes, promisc ? " promisc" : ""); + } + else + s = format (s, "\n"); + + return s; +} + +#define _line_len 72 +#define _(v, str) \ +if (bitmap & v) { \ + if (format_get_indent (s) > next_split ) { \ + next_split += _line_len; \ + s = format(s,"\n%U", format_white_space, indent); \ + } \ + s = format(s, "%s ", str); \ +} + +static u8 * format_dpdk_rss_hf_name(u8 * s, va_list * args) +{ + u64 bitmap = va_arg (*args, u64); + int next_split = _line_len; + int indent = format_get_indent (s); + + if (!bitmap) + return format(s, "none"); + + foreach_dpdk_rss_hf + + return s; +} + +static u8 * format_dpdk_rx_offload_caps(u8 * s, va_list * args) +{ + u32 bitmap = va_arg (*args, u32); + int next_split = _line_len; + int indent = format_get_indent (s); + + if (!bitmap) + return format(s, "none"); + + foreach_dpdk_rx_offload_caps + + return s; +} + +static u8 * format_dpdk_tx_offload_caps(u8 * s, va_list * args) +{ + u32 bitmap = va_arg (*args, u32); + int next_split = _line_len; + int indent = format_get_indent (s); + if (!bitmap) + return format(s, "none"); + + foreach_dpdk_tx_offload_caps + + return s; +} + +#undef _line_len +#undef _ + +static u8 * format_dpdk_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd = vec_elt_at_index (dm->devices, dev_instance); + uword indent = format_get_indent (s); + f64 now = vlib_time_now (dm->vlib_main); + + dpdk_update_counters (xd, now); + dpdk_update_link_state (xd, now); + + s = format (s, "%U\n%Ucarrier %U", + format_dpdk_device_type, xd->device_index, + format_white_space, indent + 2, + format_dpdk_link_status, xd); + + if (verbose > 1 && xd->dev_type == VNET_DPDK_DEV_ETH) + { + struct rte_eth_dev_info di; + struct rte_pci_device * pci; + struct rte_eth_rss_conf rss_conf; + int vlan_off; + + rss_conf.rss_key = 0; + rte_eth_dev_info_get(xd->device_index, &di); + rte_eth_dev_rss_hash_conf_get(xd->device_index, &rss_conf); + pci = di.pci_dev; + + s = format(s, "%Upci id: device %04x:%04x subsystem %04x:%04x\n" + "%Upci address: %04x:%02x:%02x.%02x\n", + format_white_space, indent + 2, + pci->id.vendor_id, pci->id.device_id, + pci->id.subsystem_vendor_id, + pci->id.subsystem_device_id, + format_white_space, indent + 2, + pci->addr.domain, pci->addr.bus, + pci->addr.devid, pci->addr.function); + s = format(s, "%Umax rx packet len: %d\n", + format_white_space, indent + 2, di.max_rx_pktlen); + s = format(s, "%Upromiscuous: unicast %s all-multicast %s\n", + format_white_space, indent + 2, + rte_eth_promiscuous_get(xd->device_index) ? "on" : "off", + rte_eth_promiscuous_get(xd->device_index) ? "on" : "off"); + vlan_off = rte_eth_dev_get_vlan_offload(xd->device_index); + s = format(s, "%Uvlan offload: strip %s filter %s qinq %s\n", + format_white_space, indent + 2, + vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off"); + s = format(s, "%Uqueue size (max): rx %d (%d) tx %d (%d)\n", + format_white_space, indent + 2, + xd->rx_q_used, di.max_rx_queues, + xd->tx_q_used, di.max_tx_queues); + s = format(s, "%Urx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_rx_offload_caps, di.rx_offload_capa); + s = format(s, "%Utx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_tx_offload_caps, di.tx_offload_capa); + s = format(s, "%Urss active: %U\n" + "%Urss supported: %U\n", + format_white_space, indent + 2, + format_dpdk_rss_hf_name, rss_conf.rss_hf, + format_white_space, indent + 2, + format_dpdk_rss_hf_name, di.flow_type_rss_offloads); + } + + if (xd->cpu_socket > -1) + s = format (s, "%Ucpu socket %d", + format_white_space, indent + 2, + xd->cpu_socket); + + /* $$$ MIB counters */ + + { +#define _(N, V) \ + if (xd->stats.V != 0) \ + s = format (s, "\n%U%-40U%16Ld", \ + format_white_space, indent + 2, \ + format_c_identifier, #N, xd->stats.V); + + foreach_dpdk_counter +#undef _ + } + + u8 * xs = 0; + struct rte_eth_xstats * xstat; + + vec_foreach(xstat, xd->xstats) + { + if (xstat->value) + { + /* format_c_identifier don't like c strings inside vector */ + u8 * name = format(0,"%s", xstat->name); + xs = format(xs, "\n%U%-38U%16Ld", + format_white_space, indent + 4, + format_c_identifier, name, xstat->value); + vec_free(name); + } + } + + if (xs) + { + s = format(s, "\n%Uextended stats:%v", + format_white_space, indent + 2, xs); + vec_free(xs); + } + + return s; +} + +static u8 * format_dpdk_tx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main(); + dpdk_tx_dma_trace_t * t = va_arg (*va, dpdk_tx_dma_trace_t *); + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd = vec_elt_at_index (dm->devices, t->device_index); + uword indent = format_get_indent (s); + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + + s = format (s, "%U tx queue %d", + format_vnet_sw_interface_name, vnm, sw, + t->queue_index); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, + format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U%U", format_white_space, indent, + format_ethernet_header_with_length, t->buffer.pre_data, + sizeof (t->buffer.pre_data)); + + return s; +} + +static void dpdk_clear_hw_interface_counters (u32 instance) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd = vec_elt_at_index (dm->devices, instance); + + /* + * DAW-FIXME: VMXNET3 device stop/start doesn't work, + * therefore fake the stop in the dpdk driver by + * silently dropping all of the incoming pkts instead of + * stopping the driver / hardware. + */ + if (xd->admin_up != 0xff) + { + rte_eth_stats_reset (xd->device_index); + memset (&xd->last_stats, 0, sizeof (xd->last_stats)); + dpdk_update_counters (xd, vlib_time_now (dm->vlib_main)); + } + else + { + rte_eth_stats_reset (xd->device_index); + memset(&xd->stats, 0, sizeof(xd->stats)); + memset (&xd->last_stats, 0, sizeof (xd->last_stats)); + } + rte_eth_xstats_reset(xd->device_index); +} + +static int +kni_config_network_if(u8 port_id, u8 if_up) +{ + vnet_main_t * vnm = vnet_get_main(); + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + uword *p; + + p = hash_get (dm->dpdk_device_by_kni_port_id, port_id); + if (p == 0) { + clib_warning("unknown interface"); + return 0; + } else { + xd = vec_elt_at_index (dm->devices, p[0]); + } + + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, + if_up ? VNET_HW_INTERFACE_FLAG_LINK_UP | + ETH_LINK_FULL_DUPLEX : 0); + return 0; +} + +static int +kni_change_mtu(u8 port_id, unsigned new_mtu) +{ + vnet_main_t * vnm = vnet_get_main(); + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + uword *p; + vnet_hw_interface_t * hif; + + p = hash_get (dm->dpdk_device_by_kni_port_id, port_id); + if (p == 0) { + clib_warning("unknown interface"); + return 0; + } else { + xd = vec_elt_at_index (dm->devices, p[0]); + } + hif = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); + + hif->max_packet_bytes = new_mtu; + + return 0; +} + +static clib_error_t * +dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t * hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd = vec_elt_at_index (dm->devices, hif->dev_instance); + int rv = 0; + + if (xd->dev_type == VNET_DPDK_DEV_KNI) + { + if (is_up) + { + struct rte_kni_conf conf; + struct rte_kni_ops ops; + vlib_main_t * vm = vlib_get_main(); + vlib_buffer_main_t * bm = vm->buffer_main; + memset(&conf, 0, sizeof(conf)); + snprintf(conf.name, RTE_KNI_NAMESIZE, "vpp%u", xd->kni_port_id); + conf.mbuf_size = MBUF_SIZE; + memset(&ops, 0, sizeof(ops)); + ops.port_id = xd->kni_port_id; + ops.change_mtu = kni_change_mtu; + ops.config_network_if = kni_config_network_if; + + xd->kni = rte_kni_alloc(bm->pktmbuf_pools[rte_socket_id()], &conf, &ops); + if (!xd->kni) + { + clib_warning("failed to allocate kni interface"); + } + else + { + hif->max_packet_bytes = 1500; /* kni interface default value */ + xd->admin_up = 1; + } + } + else + { + xd->admin_up = 0; + rte_kni_release(xd->kni); + } + return 0; + } + if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) + { + if (is_up) + { + if (xd->vu_is_running) + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP | + ETH_LINK_FULL_DUPLEX ); + xd->admin_up = 1; + } + else + { + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); + xd->admin_up = 0; + } + + return 0; + } + + + if (is_up) + { + f64 now = vlib_time_now (dm->vlib_main); + + /* + * DAW-FIXME: VMXNET3 device stop/start doesn't work, + * therefore fake the stop in the dpdk driver by + * silently dropping all of the incoming pkts instead of + * stopping the driver / hardware. + */ + if (xd->admin_up == 0) + rv = rte_eth_dev_start (xd->device_index); + + if (xd->promisc) + rte_eth_promiscuous_enable(xd->device_index); + else + rte_eth_promiscuous_disable(xd->device_index); + + rte_eth_allmulticast_enable (xd->device_index); + xd->admin_up = 1; + dpdk_update_counters (xd, now); + dpdk_update_link_state (xd, now); + } + else + { + rte_eth_allmulticast_disable (xd->device_index); + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); + + /* + * DAW-FIXME: VMXNET3 device stop/start doesn't work, + * therefore fake the stop in the dpdk driver by + * silently dropping all of the incoming pkts instead of + * stopping the driver / hardware. + */ + if (xd->pmd != VNET_DPDK_PMD_VMXNET3) + { + rte_eth_dev_stop (xd->device_index); + xd->admin_up = 0; + } + else + xd->admin_up = ~0; + } + + if (rv < 0) + clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", + rv); + + return /* no error */ 0; +} + +/* + * Dynamically redirect all pkts from a specific interface + * to the specified node + */ +static void dpdk_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index, + u32 node_index) +{ + dpdk_main_t * xm = &dpdk_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t * xd = vec_elt_at_index (xm->devices, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + xd->per_interface_next_index = node_index; + return; + } + + xd->per_interface_next_index = + vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index); +} + + +static clib_error_t * +dpdk_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t * st, + int is_add) +{ + dpdk_main_t * xm = &dpdk_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t * xd = vec_elt_at_index (xm->devices, hw->dev_instance); + vnet_sw_interface_t * t = (vnet_sw_interface_t *) st; + int r, vlan_offload; + + + if (xd->dev_type != VNET_DPDK_DEV_ETH) + return 0; + /* currently we program VLANS only for IXGBE VF */ + if (xd->pmd != VNET_DPDK_PMD_IXGBEVF) + return 0; + + if (t->sub.eth.flags.no_tags == 1) + return 0; + + if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1 )) + return clib_error_return (0, "unsupported VLAN setup"); + + + vlan_offload = rte_eth_dev_get_vlan_offload(xd->device_index); + vlan_offload |= ETH_VLAN_FILTER_OFFLOAD; + + if ((r = rte_eth_dev_set_vlan_offload(xd->device_index, vlan_offload))) + return clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d", + xd->device_index, r); + + + if ((r = rte_eth_dev_vlan_filter(xd->device_index, t->sub.eth.outer_vlan_id, is_add))) + return clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d", + xd->device_index, r); + + return 0; +} + +VNET_DEVICE_CLASS (dpdk_device_class) = { + .name = "dpdk", + .tx_function = dpdk_interface_tx, + .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR, + .tx_function_error_strings = dpdk_tx_func_error_strings, + .format_device_name = format_dpdk_device_name, + .format_device = format_dpdk_device, + .format_tx_trace = format_dpdk_tx_dma_trace, + .clear_counters = dpdk_clear_hw_interface_counters, + .admin_up_down_function = dpdk_interface_admin_up_down, + .subif_add_del_function = dpdk_subif_add_del_function, + .rx_redirect_to_node = dpdk_set_interface_next_node, + .no_flatten_output_chains = 1, + .name_renumber = dpdk_device_renumber, +}; + +void dpdk_set_flowcontrol_callback (vlib_main_t *vm, + dpdk_flowcontrol_callback_t callback) +{ + dpdk_main.flowcontrol_callback = callback; +} + +#define UP_DOWN_FLAG_EVENT 1 + + +u32 dpdk_get_admin_up_down_in_progress (void) +{ + return dpdk_main.admin_up_down_in_progress; +} + +static uword +admin_up_down_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + clib_error_t * error = 0; + uword event_type; + uword *event_data = 0; + u32 index; + u32 sw_if_index; + u32 flags; + + while (1) + { + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + + dpdk_main.admin_up_down_in_progress = 1; + + for (index=0; index<vec_len(event_data); index++) + { + sw_if_index = event_data[index] >> 32; + flags = (u32) event_data[index]; + + switch (event_type) { + case UP_DOWN_FLAG_EVENT: + error = vnet_sw_interface_set_flags (vnet_get_main(), sw_if_index, flags); + clib_error_report(error); + break; + } + } + + vec_reset_length (event_data); + + dpdk_main.admin_up_down_in_progress = 0; + + } + return 0; /* or not */ +} + +VLIB_REGISTER_NODE (admin_up_down_process_node,static) = { + .function = admin_up_down_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "admin-up-down-process", + .process_log2_n_stack_bytes = 17, // 256KB +}; + +/* + * Asynchronously invoke vnet_sw_interface_set_flags via the admin_up_down + * process. Useful for avoiding long blocking delays (>150ms) in the dpdk + * drivers. + * WARNING: when posting this event, no other interface-related calls should + * be made (e.g. vnet_create_sw_interface()) while the event is being + * processed (admin_up_down_in_progress). This is required in order to avoid + * race conditions in manipulating interface data structures. + */ +void post_sw_interface_set_flags (vlib_main_t *vm, u32 sw_if_index, u32 flags) +{ + vlib_process_signal_event + (vm, admin_up_down_process_node.index, + UP_DOWN_FLAG_EVENT, + (((uword)sw_if_index << 32) | flags)); +} + +/* + * Called by the dpdk driver's rte_delay_us() function. + * Return 0 to have the dpdk do a regular delay loop. + * Return 1 if to skip the delay loop because we are suspending + * the calling vlib process instead. + */ +int rte_delay_us_override (unsigned us) { + vlib_main_t * vm; + + /* Don't bother intercepting for short delays */ + if (us < 10) return 0; + + /* + * Only intercept if we are in a vlib process. + * If we are called from a vlib worker thread or the vlib main + * thread then do not intercept. (Must not be called from an + * independent pthread). + */ + if (os_get_cpu_number() == 0) + { + /* + * We're in the vlib main thread or a vlib process. Make sure + * the process is running and we're not still initializing. + */ + vm = vlib_get_main(); + if (vlib_in_process_context(vm)) + { + /* Only suspend for the admin_down_process */ + vlib_process_t * proc = vlib_get_current_process(vm); + if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) || + (proc->node_runtime.function != admin_up_down_process)) + return 0; + + f64 delay = 1e-6 * us; + vlib_process_suspend(vm, delay); + return 1; + } + } + return 0; // no override +} diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h new file mode 100644 index 00000000000..fd984e4d4df --- /dev/null +++ b/vnet/vnet/devices/dpdk/dpdk.h @@ -0,0 +1,515 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_dpdk_h__ +#define __included_dpdk_h__ + +/* $$$$ We should rename always_inline -> clib_always_inline */ +#undef always_inline + +#include <rte_config.h> + +#include <rte_common.h> +#include <rte_dev.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_tailq.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_kni.h> +#include <rte_virtio_net.h> +#include <rte_pci_dev_ids.h> +#include <rte_version.h> + +#include <vnet/unix/pcap.h> +#include <vnet/devices/virtio/vhost-user.h> + +#if CLIB_DEBUG > 0 +#define always_inline static inline +#else +#define always_inline static inline __attribute__ ((__always_inline__)) +#endif + +#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) +#define NB_MBUF (32<<10) + +vnet_device_class_t dpdk_device_class; +vlib_node_registration_t dpdk_input_node; +vlib_node_registration_t dpdk_io_input_node; +vlib_node_registration_t handoff_dispatch_node; + +typedef enum { + VNET_DPDK_DEV_ETH = 1, /* Standard DPDK PMD driver */ + VNET_DPDK_DEV_KNI, /* Kernel NIC Interface */ + VNET_DPDK_DEV_VHOST_USER, + VNET_DPDK_DEV_UNKNOWN, /* must be last */ +} dpdk_device_type_t; + +#define foreach_dpdk_pmd \ + _ ("rte_em_pmd", E1000EM) \ + _ ("rte_igb_pmd", IGB) \ + _ ("rte_igbvf_pmd", IGBVF) \ + _ ("rte_ixgbe_pmd", IXGBE) \ + _ ("rte_ixgbevf_pmd", IXGBEVF) \ + _ ("rte_i40e_pmd", I40E) \ + _ ("rte_i40evf_pmd", I40EVF) \ + _ ("rte_virtio_pmd", VIRTIO) \ + _ ("rte_vice_pmd", VICE) \ + _ ("rte_enic_pmd", ENIC) \ + _ ("rte_vmxnet3_pmd", VMXNET3) \ + _ ("AF_PACKET PMD", AF_PACKET) \ + _ ("rte_pmd_fm10k", FM10K) + +typedef enum { + VNET_DPDK_PMD_NONE, +#define _(s,f) VNET_DPDK_PMD_##f, + foreach_dpdk_pmd +#undef _ +#ifdef NETMAP + VNET_DPDK_PMD_NETMAP, +#endif + VNET_DPDK_PMD_UNKNOWN, /* must be last */ +} dpdk_pmd_t; + +typedef enum { + VNET_DPDK_PORT_TYPE_ETH_1G, + VNET_DPDK_PORT_TYPE_ETH_10G, + VNET_DPDK_PORT_TYPE_ETH_40G, + VNET_DPDK_PORT_TYPE_ETH_SWITCH, +#ifdef NETMAP + VNET_DPDK_PORT_TYPE_NETMAP, +#endif + VNET_DPDK_PORT_TYPE_AF_PACKET, + VNET_DPDK_PORT_TYPE_UNKNOWN, +} dpdk_port_type_t; + +typedef struct { + f64 deadline; + vlib_frame_t * frame; +} dpdk_frame_t; + +#define DPDK_EFD_MAX_DISCARD_RATE 10 + +typedef struct { + u16 last_burst_sz; + u16 max_burst_sz; + u32 full_frames_cnt; + u32 consec_full_frames_cnt; + u32 congestion_cnt; + u64 last_poll_time; + u64 max_poll_delay; + u32 discard_cnt; + u32 total_packet_cnt; +} dpdk_efd_agent_t; + +typedef struct { + int callfd; + int kickfd; + int errfd; + u32 callfd_idx; + u32 n_since_last_int; + f64 int_deadline; +} dpdk_vu_vring; + +typedef struct { + u32 is_up; + u32 unix_fd; + u32 unix_file_index; + u32 client_fd; + char sock_filename[256]; + int sock_errno; + u8 sock_is_server; + u8 active; + + u64 feature_mask; + u32 num_vrings; + dpdk_vu_vring vrings[2]; + u64 region_addr[VHOST_MEMORY_MAX_NREGIONS]; + u32 region_fd[VHOST_MEMORY_MAX_NREGIONS]; +} dpdk_vu_intf_t; + +typedef void (*dpdk_flowcontrol_callback_t) (vlib_main_t *vm, + u32 hw_if_index, + u32 n_packets); + +/* + * The header for the tx_vector in dpdk_device_t. + * Head and tail are indexes into the tx_vector and are of type + * u64 so they never overflow. + */ +typedef struct { + u64 tx_head; + u64 tx_tail; +} tx_ring_hdr_t; + +typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK(cacheline0); + volatile u32 *lockp; + + /* Instance ID */ + u32 device_index; + + u32 vlib_hw_if_index; + u32 vlib_sw_if_index; + + /* next node index if we decide to steal the rx graph arc */ + u32 per_interface_next_index; + + /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */ + struct rte_mbuf *** tx_vectors; /* one per worker thread */ + struct rte_mbuf *** rx_vectors; + + /* vector of traced contexts, per device */ + u32 * d_trace_buffers; + + /* per-worker destination frame queue */ + dpdk_frame_t * frames; + + dpdk_device_type_t dev_type:8; + dpdk_pmd_t pmd:8; + i8 cpu_socket; + + u8 admin_up; + u8 promisc; + + CLIB_CACHE_LINE_ALIGN_MARK(cacheline1); + + /* PMD related */ + u16 tx_q_used; + u16 rx_q_used; + u16 nb_rx_desc; + u16 nb_tx_desc; + u16 * cpu_socket_id_by_queue; + struct rte_eth_conf port_conf; + struct rte_eth_txconf tx_conf; + + /* KNI related */ + struct rte_kni *kni; + u8 kni_port_id; + + /* vhost-user related */ + u32 vu_if_id; + struct virtio_net vu_vhost_dev; + u32 vu_is_running; + dpdk_vu_intf_t *vu_intf; + + /* af_packet */ + u8 af_packet_port_id; + + struct rte_eth_link link; + f64 time_last_link_update; + + struct rte_eth_stats stats; + struct rte_eth_stats last_stats; + struct rte_eth_xstats * xstats; + f64 time_last_stats_update; + dpdk_port_type_t port_type; + + dpdk_efd_agent_t efd_agent; +} dpdk_device_t; + +#define MAX_NELTS 32 +typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK(cacheline0); + u64 head; + u64 head_hint; + u64 tail; + u32 n_in_use; + u32 nelts; + u32 written; + u32 threshold; + i32 n_vectors[MAX_NELTS]; +} frame_queue_trace_t; + +#define DPDK_TX_RING_SIZE (4 * 1024) + +#define DPDK_STATS_POLL_INTERVAL 10.0 +#define DPDK_LINK_POLL_INTERVAL 3.0 + +typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK(cacheline0); + + /* total input packet counter */ + u64 aggregate_rx_packets; +} dpdk_worker_t; + +typedef struct { + u32 device; + u16 queue_id; +} dpdk_device_and_queue_t; + +/* Early-Fast-Discard (EFD) */ +#define DPDK_EFD_DISABLED 0 +#define DPDK_EFD_DISCARD_ENABLED (1 << 0) +#define DPDK_EFD_MONITOR_ENABLED (1 << 1) +#define DPDK_EFD_DROPALL_ENABLED (1 << 2) + +#define DPDK_EFD_DEFAULT_DEVICE_QUEUE_HI_THRESH_PCT 90 +#define DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH 6 + +typedef struct dpdk_efd_t { + u16 enabled; + u16 queue_hi_thresh; + u16 consec_full_frames_hi_thresh; + u16 pad; +} dpdk_efd_t; + +typedef struct { + + /* Devices */ + dpdk_device_t * devices; + dpdk_device_and_queue_t ** devices_by_cpu; + + /* per-thread recycle lists */ + u32 ** recycle; + + /* flow control callback. If 0 then flow control is disabled */ + dpdk_flowcontrol_callback_t flowcontrol_callback; + + /* vlib buffer free list, must be same size as an rte_mbuf */ + u32 vlib_buffer_free_list_index; + + /* + * format interface names ala xxxEthernet%d/%d/%d instead of + * xxxEthernet%x/%x/%x. For VIRL. + */ + u8 interface_name_format_decimal; + + + /* dpdk worker "threads" */ + dpdk_worker_t * workers; + + /* Config stuff */ + u8 ** eal_init_args; + u8 * eth_if_blacklist; + u8 * eth_if_whitelist; + u8 * uio_driver_name; + u8 no_multi_seg; + + /* Required config parameters */ + u8 coremask_set_manually; + u8 nchannels_set_manually; + u32 coremask; + u32 nchannels; + u32 num_mbufs; + u32 use_rss; + u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */ + + /* Ethernet input node index */ + u32 ethernet_input_node_index; + + /* dpdk i/o thread initialization barrier */ + volatile u32 io_thread_release; + + /* pcap tracing [only works if (CLIB_DEBUG > 0)] */ + int tx_pcap_enable; + pcap_main_t pcap_main; + u8 * pcap_filename; + u32 pcap_sw_if_index; + u32 pcap_pkts_to_capture; + + /* virtio vhost-user switch */ + u8 use_virtio_vhost; + + /* vhost-user coalescence frames config */ + u32 vhost_coalesce_frames; + f64 vhost_coalesce_time; + + /* hashes */ + uword * dpdk_device_by_kni_port_id; + uword * vu_sw_if_index_by_listener_fd; + uword * vu_sw_if_index_by_sock_fd; + u32 * vu_inactive_interfaces_device_index; + + u32 next_vu_if_id; + + /* efd (early-fast-discard) settings */ + dpdk_efd_t efd; + + /* + * flag indicating that a posted admin up/down + * (via post_sw_interface_set_flags) is in progress + */ + u8 admin_up_down_in_progress; + + u8 have_io_threads; + + /* which cpus are running dpdk-input */ + int input_cpu_first_index; + int input_cpu_count; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} dpdk_main_t; + +dpdk_main_t dpdk_main; + +typedef enum { + DPDK_RX_NEXT_IP4_INPUT, + DPDK_RX_NEXT_IP6_INPUT, + DPDK_RX_NEXT_MPLS_INPUT, + DPDK_RX_NEXT_ETHERNET_INPUT, + DPDK_RX_NEXT_DROP, + DPDK_RX_N_NEXT, +} dpdk_rx_next_t; + +void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b); + +void dpdk_set_next_node (dpdk_rx_next_t, char *); + +typedef void (*dpdk_io_thread_callback_t) (vlib_main_t *vm); + +void dpdk_io_thread (vlib_worker_thread_t * w, + u32 instances, + u32 instance_id, + char *worker_name, + dpdk_io_thread_callback_t callback); +void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd); + +clib_error_t * dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); + +void dpdk_set_flowcontrol_callback (vlib_main_t *vm, + dpdk_flowcontrol_callback_t callback); + +u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance); + +vlib_frame_queue_elt_t * vlib_get_handoff_queue_elt (u32 vlib_worker_index); + +u32 dpdk_get_handoff_node_index (void); + +void set_efd_bitmap (u8 *bitmap, u32 value, u32 op); + +#define foreach_dpdk_error \ + _(NONE, "no error") \ + _(RX_PACKET_ERROR, "Rx packet errors") \ + _(RX_BAD_FCS, "Rx bad fcs") \ + _(L4_CHECKSUM_ERROR, "Rx L4 checksum errors") \ + _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \ + _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \ + _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \ + _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") \ + _(IPV4_EFD_DROP_PKTS, "IPV4 Early Fast Discard rx drops") \ + _(IPV6_EFD_DROP_PKTS, "IPV6 Early Fast Discard rx drops") \ + _(MPLS_EFD_DROP_PKTS, "MPLS Early Fast Discard rx drops") \ + _(VLAN_EFD_DROP_PKTS, "VLAN Early Fast Discard rx drops") + +typedef enum { +#define _(f,s) DPDK_ERROR_##f, + foreach_dpdk_error +#undef _ + DPDK_N_ERROR, +} dpdk_error_t; + +/* + * Increment EFD drop counter + */ +static_always_inline +void increment_efd_drop_counter (vlib_main_t * vm, u32 counter_index, u32 count) +{ + vlib_node_t *my_n; + + my_n = vlib_get_node (vm, dpdk_input_node.index); + vm->error_main.counters[my_n->error_heap_index+counter_index] += count; +} + +void dpdk_update_link_state (dpdk_device_t * xd, f64 now); +void dpdk_efd_update_counters(dpdk_device_t *xd, u32 n_buffers, u16 enabled); +u32 is_efd_discardable(vlib_thread_main_t *tm, + vlib_buffer_t * b0, + struct rte_mbuf *mb); + +/* dpdk vhost-user interrupt management */ +u8 dpdk_vhost_user_want_interrupt (dpdk_device_t *xd, int idx); +void dpdk_vhost_user_send_interrupt (vlib_main_t * vm, dpdk_device_t * xd, + int idx); + + +static inline u64 vnet_get_aggregate_rx_packets (void) +{ + dpdk_main_t * dm = &dpdk_main; + u64 sum = 0; + dpdk_worker_t * dw; + + vec_foreach(dw, dm->workers) + sum += dw->aggregate_rx_packets; + + return sum; +} + +void dpdk_rx_trace (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, + u32 * buffers, + uword n_buffers); + +#define EFD_OPERATION_LESS_THAN 0 +#define EFD_OPERATION_GREATER_OR_EQUAL 1 + +void efd_config(u32 enabled, + u32 ip_prec, u32 ip_op, + u32 mpls_exp, u32 mpls_op, + u32 vlan_cos, u32 vlan_op); + +void post_sw_interface_set_flags (vlib_main_t *vm, u32 sw_if_index, u32 flags); + +typedef struct vhost_user_memory vhost_user_memory_t; + +void dpdk_vhost_user_process_init (void **ctx); +void dpdk_vhost_user_process_cleanup (void *ctx); +uword dpdk_vhost_user_process_if (vlib_main_t *vm, dpdk_device_t *xd, void *ctx); + +// vhost-user calls +int dpdk_vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, + const char * sock_filename, + u8 is_server, + u32 * sw_if_index, + u64 feature_mask, + u8 renumber, u32 custom_dev_instance); +int dpdk_vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, + const char * sock_filename, + u8 is_server, + u32 sw_if_index, + u64 feature_mask, + u8 renumber, u32 custom_dev_instance); +int dpdk_vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, + u32 sw_if_index); +int dpdk_vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, + vhost_user_intf_details_t **out_vuids); + +u32 dpdk_get_admin_up_down_in_progress (void); + +uword +dpdk_input_rss (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f); + +#endif /* __included_dpdk_h__ */ diff --git a/vnet/vnet/devices/dpdk/dpdk_priv.h b/vnet/vnet/devices/dpdk/dpdk_priv.h new file mode 100644 index 00000000000..e452e02d90d --- /dev/null +++ b/vnet/vnet/devices/dpdk/dpdk_priv.h @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define DPDK_NB_RX_DESC_DEFAULT 512 +#define DPDK_NB_TX_DESC_DEFAULT 512 +#define DPDK_NB_RX_DESC_VIRTIO 256 +#define DPDK_NB_TX_DESC_VIRTIO 256 +#define DPDK_NB_RX_DESC_10GE 2048 +#define DPDK_NB_TX_DESC_10GE 2048 +#define DPDK_NB_RX_DESC_40GE (4096-128) +#define DPDK_NB_TX_DESC_40GE 2048 + +#define foreach_dpdk_counter \ + _ (tx_frames_ok, opackets) \ + _ (tx_bytes_ok, obytes) \ + _ (tx_errors, oerrors) \ + _ (tx_loopback_frames_ok, olbpackets) \ + _ (tx_loopback_bytes_ok, olbbytes) \ + _ (rx_frames_ok, ipackets) \ + _ (rx_bytes_ok, ibytes) \ + _ (rx_errors, ierrors) \ + _ (rx_missed, imissed) \ + _ (rx_bad_crc, ibadcrc) \ + _ (rx_bad_length, ibadlen) \ + _ (rx_multicast_frames_ok, imcasts) \ + _ (rx_no_bufs, rx_nombuf) \ + _ (rx_filter_match, fdirmatch) \ + _ (rx_filter_miss, fdirmiss) \ + _ (tx_pause_xon, tx_pause_xon) \ + _ (rx_pause_xon, rx_pause_xon) \ + _ (tx_pause_xoff, tx_pause_xoff) \ + _ (rx_pause_xoff, rx_pause_xoff) \ + _ (rx_loopback_frames_ok, ilbpackets) \ + _ (rx_loopback_bytes_ok, ilbbytes) + +#define foreach_dpdk_q_counter \ + _ (rx_frames_ok, q_ipackets) \ + _ (tx_frames_ok, q_opackets) \ + _ (rx_bytes_ok, q_ibytes) \ + _ (tx_bytes_ok, q_obytes) \ + _ (rx_errors, q_errors) + +#define foreach_dpdk_rss_hf \ + _(ETH_RSS_IPV4, "ipv4") \ + _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \ + _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \ + _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \ + _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \ + _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \ + _(ETH_RSS_IPV6, "ipv6") \ + _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \ + _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \ + _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \ + _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \ + _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \ + _(ETH_RSS_L2_PAYLOAD, "l2-payload") \ + _(ETH_RSS_IPV6_EX, "ipv6-ex") \ + _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \ + _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") + +#define foreach_dpdk_rx_offload_caps \ + _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \ + _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ + _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ + _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ + _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \ + _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip") + +#define foreach_dpdk_tx_offload_caps \ + _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \ + _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ + _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ + _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ + _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \ + _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \ + _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \ + _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \ + _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert") + +#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0) + +#define foreach_dpdk_pkt_rx_offload_flag \ + _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \ + _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \ + _ (PKT_RX_FDIR, "RX packet with FDIR infos") \ + _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \ + _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \ + _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \ + _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") + +#define foreach_dpdk_pkt_type \ + _ (RTE_PTYPE_L3_IPV4, "Packet with IPv4 header") \ + _ (RTE_PTYPE_L3_IPV4_EXT, "Packet with extended IPv4 header") \ + _ (RTE_PTYPE_L3_IPV6, "Packet with IPv6 header") \ + _ (RTE_PTYPE_L3_IPV6_EXT, "Packet with extended IPv6 header") +#else +#define foreach_dpdk_pkt_rx_offload_flag \ + _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \ + _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \ + _ (PKT_RX_FDIR, "RX packet with FDIR infos") \ + _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \ + _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \ + _ (PKT_RX_IPV4_HDR, "RX packet with IPv4 header") \ + _ (PKT_RX_IPV4_HDR_EXT, "RX packet with extended IPv4 header") \ + _ (PKT_RX_IPV6_HDR, "RX packet with IPv6 header") \ + _ (PKT_RX_IPV6_HDR_EXT, "RX packet with extended IPv6 header") \ + _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \ + _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") + +#define foreach_dpdk_pkt_type /* Dummy */ +#endif /* RTE_VERSION */ + +#define foreach_dpdk_pkt_tx_offload_flag \ + _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \ + _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp") + +#define foreach_dpdk_pkt_offload_flag \ + foreach_dpdk_pkt_rx_offload_flag \ + foreach_dpdk_pkt_tx_offload_flag + +static inline u8 * format_dpdk_pkt_types (u8 * s, va_list * va) +{ + u32 *pkt_types = va_arg (*va, u32 *); + uword indent __attribute__((unused)) = format_get_indent (s) + 2; + + if (!*pkt_types) + return s; + + s = format (s, "Packet Types"); + +#define _(F, S) \ + if (*pkt_types & F) \ + { \ + s = format (s, "\n%U%s (0x%04x) %s", \ + format_white_space, indent, #F, F, S); \ + } + + foreach_dpdk_pkt_type + +#undef _ + + return s; +} + +static inline u8 * format_dpdk_pkt_offload_flags (u8 * s, va_list * va) +{ + u16 *ol_flags = va_arg (*va, u16 *); + uword indent = format_get_indent (s) + 2; + + if (!*ol_flags) + return s; + + s = format (s, "Packet Offload Flags"); + +#define _(F, S) \ + if (*ol_flags & F) \ + { \ + s = format (s, "\n%U%s (0x%04x) %s", \ + format_white_space, indent, #F, F, S); \ + } + + foreach_dpdk_pkt_offload_flag + +#undef _ + + return s; +} + +static inline u8 * format_dpdk_rte_mbuf (u8 * s, va_list * va) +{ + struct rte_mbuf * mb = va_arg (*va, struct rte_mbuf *); + uword indent = format_get_indent (s) + 2; + + s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d" + "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x," + "\n%Upacket_type 0x%x", + mb->port, mb->nb_segs, mb->pkt_len, + format_white_space, indent, + mb->buf_len, mb->data_len, mb->ol_flags, + format_white_space, indent, + mb->packet_type); + + if (mb->ol_flags) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_offload_flags, &mb->ol_flags); + + if (mb->packet_type) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_types, &mb->packet_type); + return s; +} + +#ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS +#define foreach_dpdk_pkt_ext_rx_offload_flag \ + _ (PKT_EXT_RX_PKT_ERROR, "RX Packet Error") \ + _ (PKT_EXT_RX_BAD_FCS, "RX Bad FCS checksum") \ + _ (PKT_EXT_RX_UDP, "RX packet with UDP L4 header") \ + _ (PKT_EXT_RX_TCP, "RX packet with TCP L4 header") \ + _ (PKT_EXT_RX_IPV4_FRAGMENT, "RX packet IPv4 Fragment") + +#define foreach_dpdk_pkt_ext_offload_flag \ + foreach_dpdk_pkt_rx_offload_flag \ + foreach_dpdk_pkt_ext_rx_offload_flag + +static inline u8 * format_dpdk_pkt_rx_offload_flags (u8 * s, va_list * va) +{ + u16 *ol_flags = va_arg (*va, u16 *); + uword indent = format_get_indent (s) + 2; + + if (!*ol_flags) + return s; + + s = format (s, "Packet RX Offload Flags"); + +#define _(F, S) \ + if (*ol_flags & F) \ + { \ + s = format (s, "\n%U%s (0x%04x) %s", \ + format_white_space, indent, #F, F, S); \ + } + + foreach_dpdk_pkt_ext_offload_flag + +#undef _ + + return s; +} + +static inline u8 * format_dpdk_rx_rte_mbuf (u8 * s, va_list * va) +{ + struct rte_mbuf * mb = va_arg (*va, struct rte_mbuf *); + uword indent = format_get_indent (s) + 2; + + /* + * Note: Assumes mb is head of pkt chain -- port, nb_segs, & pkt_len + * are only valid for the 1st mbuf segment. + */ + s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d" + "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x" + "\n%Upacket_type 0x%x", + mb->port, mb->nb_segs, mb->pkt_len, + format_white_space, indent, + mb->buf_len, mb->data_len, mb->ol_flags, + format_white_space, indent, + mb->packet_type); + + if (mb->ol_flags) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_rx_offload_flags, &mb->ol_flags); + + if (mb->packet_type) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_types, &mb->packet_type); + return s; +} +#endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */ + +/* These args appear by themselves */ +#define foreach_eal_double_hyphen_predicate_arg \ +_(no-shconf) \ +_(no-hpet) \ +_(no-pci) \ +_(no-huge) \ +_(vmware-tsc-map) \ +_(virtio-vhost) + +#define foreach_eal_single_hyphen_mandatory_arg \ +_(coremask, c) \ +_(nchannels, n) \ + +#define foreach_eal_single_hyphen_arg \ +_(blacklist, b) \ +_(mem-alloc-request, m) \ +_(force-ranks, r) + +/* These args are preceeded by "--" and followed by a single string */ +#define foreach_eal_double_hyphen_arg \ +_(huge-dir) \ +_(proc-type) \ +_(file-prefix) \ +_(socket-mem) \ +_(vdev) + +static inline u32 +dpdk_rx_burst ( dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id) +{ + u32 n_buffers; + u32 n_left; + u32 n_this_chunk; + + n_left = VLIB_FRAME_SIZE; + n_buffers = 0; + + if (PREDICT_TRUE(xd->dev_type == VNET_DPDK_DEV_ETH)) + { + while (n_left) + { + n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id, + xd->rx_vectors[queue_id] + n_buffers, n_left); + n_buffers += n_this_chunk; + n_left -= n_this_chunk; + + /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */ + if (n_this_chunk < 32) + break; + } + } + else if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) + { + vlib_main_t * vm = vlib_get_main(); + vlib_buffer_main_t * bm = vm->buffer_main; + unsigned socket_id = rte_socket_id(); + + if (PREDICT_FALSE(!xd->vu_is_running)) + return 0; + + n_buffers = rte_vhost_dequeue_burst(&xd->vu_vhost_dev, VIRTIO_TXQ, + bm->pktmbuf_pools[socket_id], + xd->rx_vectors[queue_id], VLIB_FRAME_SIZE); + + f64 now = vlib_time_now (dm->vlib_main); + + /* send pending interrupts if needed */ + if (dpdk_vhost_user_want_interrupt(xd, VIRTIO_TXQ)) { + dpdk_vu_vring *vring = &(xd->vu_intf->vrings[VIRTIO_TXQ]); + vring->n_since_last_int += n_buffers; + + if ((vring->n_since_last_int && (vring->int_deadline < now)) + || (vring->n_since_last_int > dm->vhost_coalesce_frames)) + dpdk_vhost_user_send_interrupt(dm->vlib_main, xd, VIRTIO_TXQ); + } + + if (dpdk_vhost_user_want_interrupt(xd, VIRTIO_RXQ)) { + dpdk_vu_vring *vring = &(xd->vu_intf->vrings[VIRTIO_RXQ]); + if (vring->n_since_last_int && (vring->int_deadline < now)) + dpdk_vhost_user_send_interrupt(dm->vlib_main, xd, VIRTIO_RXQ); + } + + } + else if (xd->dev_type == VNET_DPDK_DEV_KNI) + { + n_buffers = rte_kni_rx_burst(xd->kni, xd->rx_vectors[queue_id], VLIB_FRAME_SIZE); + rte_kni_handle_request(xd->kni); + } + else + { + ASSERT(0); + } + + return n_buffers; +} + + +static inline void +dpdk_update_counters (dpdk_device_t * xd, f64 now) +{ + vlib_simple_counter_main_t * cm; + vnet_main_t * vnm = vnet_get_main(); + u32 my_cpu = os_get_cpu_number(); + u64 rxerrors, last_rxerrors; + int len; + + /* only update counters for PMD interfaces */ + if (xd->dev_type != VNET_DPDK_DEV_ETH) + return; + + /* + * DAW-FIXME: VMXNET3 device stop/start doesn't work, + * therefore fake the stop in the dpdk driver by + * silently dropping all of the incoming pkts instead of + * stopping the driver / hardware. + */ + if (xd->admin_up != 0xff) + { + xd->time_last_stats_update = now ? now : xd->time_last_stats_update; + memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats)); + rte_eth_stats_get (xd->device_index, &xd->stats); + + /* maybe bump interface rx no buffer counter */ + if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_NO_BUF); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.rx_nombuf - + xd->last_stats.rx_nombuf); + } + + /* missed pkt counter */ + if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_MISS); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.imissed - + xd->last_stats.imissed); + } + rxerrors = xd->stats.ibadcrc + + xd->stats.ibadlen + xd->stats.ierrors; + last_rxerrors = xd->last_stats.ibadcrc + + xd->last_stats.ibadlen + xd->last_stats.ierrors; + + if (PREDICT_FALSE (rxerrors != last_rxerrors)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_ERROR); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + rxerrors - last_rxerrors); + } + } + + if ((len = rte_eth_xstats_get(xd->device_index, NULL, 0)) > 0) + { + vec_validate(xd->xstats, len - 1); + len = rte_eth_xstats_get(xd->device_index, xd->xstats, vec_len(xd->xstats)); + ASSERT(vec_len(xd->xstats) == len); + _vec_len(xd->xstats) = len; + } +} diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c new file mode 100644 index 00000000000..a4b0f01475f --- /dev/null +++ b/vnet/vnet/devices/dpdk/init.c @@ -0,0 +1,1728 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/error.h> +#include <vppinfra/format.h> + +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/dpdk/dpdk.h> +#include <vlib/unix/physmem.h> + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <string.h> +#include <fcntl.h> + +#include "dpdk_priv.h" + +dpdk_main_t dpdk_main; + +/* force linker to link functions used by vlib and declared weak */ +void *vlib_weakly_linked_functions[] = { + &rte_pktmbuf_init, + &rte_pktmbuf_pool_init, +}; + +#define LINK_STATE_ELOGS 0 + +#define DEFAULT_HUGE_DIR "/run/vpp/hugepages" +#define VPP_RUN_DIR "/run/vpp" + +/* Port configuration, mildly modified Intel app values */ + +static struct rte_eth_conf port_conf_template = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .hw_strip_crc = 1, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +clib_error_t * +dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) +{ + vlib_main_t * vm = vlib_get_main(); + vlib_buffer_main_t * bm = vm->buffer_main; + int rv; + int j; + + ASSERT(os_get_cpu_number() == 0); + + if (xd->admin_up) { + vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); + rte_eth_dev_stop (xd->device_index); + } + + rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, + xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + /* Set up one TX-queue per worker thread */ + for (j = 0; j < xd->tx_q_used; j++) + { + rv = rte_eth_tx_queue_setup(xd->device_index, j, xd->nb_tx_desc, + xd->cpu_socket, &xd->tx_conf); + if (rv < 0) + break; + } + + if (rv < 0) + return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", + xd->device_index, rv); + + for (j = 0; j < xd->rx_q_used; j++) + { + + rv = rte_eth_rx_queue_setup(xd->device_index, j, xd->nb_rx_desc, + xd->cpu_socket, 0, + bm->pktmbuf_pools[xd->cpu_socket_id_by_queue[j]]); + if (rv < 0) + return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", + xd->device_index, rv); + } + + if (xd->admin_up) { + rte_eth_dev_start (xd->device_index); + } + return 0; +} + +static u32 dpdk_flag_change (vnet_main_t * vnm, + vnet_hw_interface_t * hi, + u32 flags) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance); + u32 old = 0; + + if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags)) + { + old = xd->promisc; + xd->promisc = flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL; + + if (xd->admin_up) + { + if (xd->promisc) + rte_eth_promiscuous_enable(xd->device_index); + else + rte_eth_promiscuous_disable(xd->device_index); + } + } + else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags)) + { + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_VICE || + xd->pmd == VNET_DPDK_PMD_ENIC) + { + struct rte_eth_dev_info dev_info; + + /* + * Restore mtu to what has been set by CIMC in the firmware cfg. + */ + rte_eth_dev_info_get(xd->device_index, &dev_info); + hi->max_packet_bytes = dev_info.max_rx_pktlen; + + vlib_cli_output (vlib_get_main(), + "Cisco VIC mtu can only be changed " + "using CIMC then rebooting the server!"); + } + else + { + int rv; + + /* + * DAW-FIXME: The DPDK VMXNET3 driver does not currently support + * multi-buffer packets. Max out at 1518 bytes for now. + * + * If/when the driver gets fixed, then this should be + * removed. + */ + if ((xd->pmd == VNET_DPDK_PMD_VMXNET3) && + (hi->max_packet_bytes > 1518)) + { + hi->max_packet_bytes = 1518; + + vlib_cli_output (vlib_get_main(), + "VMXNET3 driver does not support jumbo frames " + "yet -- setting mtu to 1518!"); + } + + xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; + + if (xd->admin_up) + rte_eth_dev_stop (xd->device_index); + + rv = rte_eth_dev_configure + (xd->device_index, + xd->rx_q_used, + xd->tx_q_used, + &xd->port_conf); + + if (rv < 0) + vlib_cli_output (vlib_get_main(), + "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + rte_eth_dev_set_mtu(xd->device_index, hi->max_packet_bytes); + + if (xd->admin_up) + rte_eth_dev_start (xd->device_index); + } + } + return old; +} + +#ifdef NETMAP +extern int rte_netmap_probe(void); +#endif + +static clib_error_t * +dpdk_lib_init (dpdk_main_t * dm) +{ + u32 nports; + u32 nb_desc = 0; + int i; + clib_error_t * error; + vlib_main_t * vm = vlib_get_main(); + vlib_thread_main_t * tm = vlib_get_thread_main(); + vnet_sw_interface_t * sw; + vnet_hw_interface_t * hi; + dpdk_device_t * xd; + vlib_thread_registration_t * tr; + uword * p; + + u32 next_cpu = 0; + u8 af_packet_port_id = 0; + + dm->input_cpu_first_index = 0; + dm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "io"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (!tr || tr->count == 0) + { + /* no io threads, workers doing input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + } + else + { + dm->have_io_threads = 1; + } + + if (tr && tr->count > 0) + { + dm->input_cpu_first_index = tr->first_index; + dm->input_cpu_count = tr->count; + } + + vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + vec_validate_aligned (dm->workers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + +#ifdef NETMAP + if(rte_netmap_probe() < 0) + return clib_error_return (0, "rte netmap probe failed"); +#endif + + nports = rte_eth_dev_count(); + if (nports < 1) + { + clib_warning ("DPDK drivers found no ports..."); + } + + if (CLIB_DEBUG > 0) + clib_warning ("DPDK drivers found %d ports...", nports); + + /* + * All buffers are all allocated from the same rte_mempool. + * Thus they all have the same number of data bytes. + */ + dm->vlib_buffer_free_list_index = + vlib_buffer_get_or_create_free_list ( + vm, VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, "dpdk rx"); + + for (i = 0; i < nports; i++) + { + u8 addr[6]; + int j; + struct rte_eth_dev_info dev_info; + clib_error_t * rv; + struct rte_eth_link l; + + /* Create vnet interface */ + vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); + xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + xd->cpu_socket = (i8) rte_eth_dev_socket_id(i); + rte_eth_dev_info_get(i, &dev_info); + + memcpy(&xd->tx_conf, &dev_info.default_txconf, + sizeof(struct rte_eth_txconf)); + if (dm->no_multi_seg) + { + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 0; + } + else + { + xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 1; + } + + memcpy(&xd->port_conf, &port_conf_template, sizeof(struct rte_eth_conf)); + + xd->tx_q_used = dev_info.max_tx_queues < tm->n_vlib_mains ? + 1 : tm->n_vlib_mains; + + if (dm->use_rss > 1 && dev_info.max_rx_queues >= dm->use_rss) + { + xd->rx_q_used = dm->use_rss; + xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; + } + else + xd->rx_q_used = 1; + + xd->dev_type = VNET_DPDK_DEV_ETH; + if (!xd->pmd) { + + +#define _(s,f) else if (!strcmp(dev_info.driver_name, s)) \ + xd->pmd = VNET_DPDK_PMD_##f; + if (0) + ; + foreach_dpdk_pmd +#undef _ + else + xd->pmd = VNET_DPDK_PMD_UNKNOWN; + + + switch (xd->pmd) { + /* 1G adapters */ + case VNET_DPDK_PMD_E1000EM: + case VNET_DPDK_PMD_IGB: + case VNET_DPDK_PMD_IGBVF: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + break; + + /* 10G adapters */ + case VNET_DPDK_PMD_IXGBE: + case VNET_DPDK_PMD_IXGBEVF: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; + break; + + /* Cisco VIC */ + case VNET_DPDK_PMD_VICE: + case VNET_DPDK_PMD_ENIC: + rte_eth_link_get_nowait(xd->device_index, &l); + if (l.link_speed == 40000) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; + } + else + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; + } + break; + + /* Intel Fortville */ + case VNET_DPDK_PMD_I40E: + case VNET_DPDK_PMD_I40EVF: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; + + switch (dev_info.pci_dev->id.device_id) { + case I40E_DEV_ID_10G_BASE_T: + case I40E_DEV_ID_SFP_XL710: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case I40E_DEV_ID_QSFP_A: + case I40E_DEV_ID_QSFP_B: + case I40E_DEV_ID_QSFP_C: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case I40E_DEV_ID_VF: + rte_eth_link_get_nowait(xd->device_index, &l); + xd->port_type = l.link_speed == 10000 ? + VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + /* Intel Red Rock Canyon */ + case VNET_DPDK_PMD_FM10K: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; + xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; + break; + + /* virtio */ + case VNET_DPDK_PMD_VIRTIO: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; + xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; + break; + + /* vmxnet3 */ + case VNET_DPDK_PMD_VMXNET3: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + break; + + case VNET_DPDK_PMD_AF_PACKET: + xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; + xd->af_packet_port_id = af_packet_port_id++; + break; + + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + + #ifdef NETMAP + if(strncmp(dev_info.driver_name, "vale", 4) == 0 + || strncmp(dev_info.driver_name, "netmap", 6) == 0) + { + xd->pmd = VNET_DPDK_PMD_NETMAP; + xd->port_type = VNET_DPDK_PORT_TYPE_NETMAP; + } + #endif + + } + + /* + * Ensure default mtu is not > the mtu read from the hardware. + * Otherwise rte_eth_dev_configure() will fail and the port will + * not be available. + */ + xd->port_conf.rxmode.max_rx_pkt_len = + (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen) ? + dev_info.max_rx_pktlen : ETHERNET_MAX_PACKET_BYTES; + + /* + * DAW-FIXME: VMXNET3 driver doesn't support jumbo / multi-buffer pkts + */ + if (xd->pmd == VNET_DPDK_PMD_VMXNET3) + { + xd->port_conf.rxmode.max_rx_pkt_len = 1518; + xd->port_conf.rxmode.jumbo_frame = 0; + } + + if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) + { + f64 now = vlib_time_now(vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + memcpy (addr+2, &rnd, sizeof(rnd)); + addr[0] = 2; + addr[1] = 0xfe; + } + else + rte_eth_macaddr_get(i,(struct ether_addr *)addr); + + if (xd->tx_q_used < tm->n_vlib_mains) + { + xd->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) xd->lockp, 0, CLIB_CACHE_LINE_BYTES); + } + + xd->device_index = xd - dm->devices; + ASSERT(i == xd->device_index); + xd->per_interface_next_index = ~0; + + /* assign interface to input thread */ + dpdk_device_and_queue_t * dq; + int q; + + for (q = 0; q < xd->rx_q_used; q++) + { + int cpu = dm->input_cpu_first_index + next_cpu; + unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id; + + /* + * numa node for worker thread handling this queue + * needed for taking buffers from the right mempool + */ + vec_validate(xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); + + /* + * construct vector of (device,queue) pairs for each worker thread + */ + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q; + + next_cpu++; + if (next_cpu == dm->input_cpu_count) + next_cpu = 0; + } + + vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < tm->n_vlib_mains; j++) + { + vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, + sizeof(tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } + + vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j< xd->rx_q_used; j++) + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE-1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } + + vec_validate_aligned (xd->frames, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + + rv = dpdk_port_setup(dm, xd); + + if (rv < 0) + return rv; + + /* count the number of descriptors used for this device */ + nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; + + error = ethernet_register_interface + (dm->vnet_main, + dpdk_device_class.index, + xd->device_index, + /* ethernet address */ addr, + &xd->vlib_hw_if_index, + dpdk_flag_change); + if (error) + return error; + + sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); + xd->vlib_sw_if_index = sw->sw_if_index; + hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); + + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_VICE || + xd->pmd == VNET_DPDK_PMD_ENIC) + { + /* + * Initialize mtu to what has been set by CIMC in the firmware cfg. + */ + hi->max_packet_bytes = dev_info.max_rx_pktlen; + /* + * remove vlan tag from VIC port to fix VLAN0 issue. + * TODO Handle VLAN tagged traffic + */ + int vlan_off; + vlan_off = rte_eth_dev_get_vlan_offload(xd->device_index); + vlan_off |= ETH_VLAN_STRIP_OFFLOAD; + rte_eth_dev_set_vlan_offload(xd->device_index, vlan_off); + } + /* + * DAW-FIXME: VMXNET3 driver doesn't support jumbo / multi-buffer pkts + */ + else if (xd->pmd == VNET_DPDK_PMD_VMXNET3) + hi->max_packet_bytes = 1518; + + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = + xd->port_conf.rxmode.max_rx_pkt_len - sizeof(ethernet_header_t); + + rte_eth_dev_set_mtu(xd->device_index, hi->max_packet_bytes); + } + + if (dm->num_kni) { + clib_warning("Initializing KNI interfaces..."); + rte_kni_init(dm->num_kni); + for (i = 0; i < dm->num_kni; i++) + { + u8 addr[6]; + int j; + + /* Create vnet interface */ + vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); + xd->dev_type = VNET_DPDK_DEV_KNI; + + xd->device_index = xd - dm->devices; + ASSERT(nports + i == xd->device_index); + xd->per_interface_next_index = ~0; + xd->kni_port_id = i; + xd->cpu_socket = -1; + hash_set (dm->dpdk_device_by_kni_port_id, i, xd - dm->devices); + xd->rx_q_used = 1; + + /* assign interface to input thread */ + dpdk_device_and_queue_t * dq; + vec_add2(dm->devices_by_cpu[dm->input_cpu_first_index], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + + vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < tm->n_vlib_mains; j++) + { + vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, + sizeof(tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } + + vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j< xd->rx_q_used; j++) + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE-1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } + + vec_validate_aligned (xd->frames, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + + /* FIXME Set up one TX-queue per worker thread */ + + { + f64 now = vlib_time_now(vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (addr+2, &rnd, sizeof(rnd)); + addr[0] = 2; + addr[1] = 0xfe; + } + + error = ethernet_register_interface + (dm->vnet_main, + dpdk_device_class.index, + xd->device_index, + /* ethernet address */ addr, + &xd->vlib_hw_if_index, + dpdk_flag_change); + + if (error) + return error; + + sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); + xd->vlib_sw_if_index = sw->sw_if_index; + hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); + } + } + + if (nb_desc > dm->num_mbufs) + clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", + dm->num_mbufs, nb_desc); + + /* init next vhost-user if index */ + dm->next_vu_if_id = 0; + + return 0; +} + +/* + * Tell the vlib physical memory allocator that we've handled + * the initialization. We don't actually do so until + * vlib_main(...) callls the dpdk config function. + */ +int vlib_app_physmem_init (vlib_main_t * vm, physmem_main_t * pm, + int physmem_required) +{ + return 1; +} + +static clib_error_t * +write_sys_fs (char * file_name, char * fmt, ...) +{ + u8 * s; + int fd; + + fd = open (file_name, O_WRONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + va_list va; + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + vec_add1 (s, 0); // terminate c string + + if (write (fd, s, vec_len (s)) < 0) + return clib_error_return_unix (0, "write '%s' to '%s'", s, file_name); + + vec_free (s); + close (fd); + return 0; +} + +#define VIRTIO_PCI_NAME "virtio-pci" + +static clib_error_t * dpdk_bind_eth_kernel_drivers (vlib_main_t * vm, + char * pci_dev_id, + char * kernel_driver) +{ + dpdk_main_t * dm = &dpdk_main; + unformat_input_t _in; + unformat_input_t * in = &_in; + clib_error_t * error = 0; + u8 * line = 0, * modcmd = 0, * path = 0; + u8 * pci_vid = 0, *pci_did = 0, * devname = 0; + char *driver_name = kernel_driver; + FILE * fp; + + /* + * Bail out now if we're not running as root. + * This allows non-privileged use of the packet generator, etc. + */ + if (geteuid() != 0) + return 0; + + /* + * Get all ethernet pci device numbers for the device type specified. + */ + modcmd = format (0, "lspci -nDd %s | grep 0200 | " + "awk '{ print $1, $3 }'%c", pci_dev_id, 0); + if ((fp = popen ((const char *)modcmd, "r")) == NULL) + { + error = clib_error_return_unix (0, + "Unable to get %s ethernet pci devices.", + pci_dev_id); + goto done; + } + + vec_validate (line, BUFSIZ); + vec_validate (path, BUFSIZ); + while (fgets ((char *)line, BUFSIZ, fp) != NULL) + { + struct stat st; + u8 bind_uio = 1; + line[strlen ((char *)line) - 1] = 0; // chomp trailing newline. + + unformat_init_string (in, (char *)line, strlen((char *)line) + 1); + unformat(in, "%s %s:%s", &devname, &pci_vid, &pci_did); + unformat_free (in); + + /* + * Blacklist all ethernet interfaces in the + * linux IP routing tables (route --inet --inet6) + */ + if (strstr ((char *)dm->eth_if_blacklist, (char *)devname)) + continue; + + /* + * If there are any devices whitelisted, then blacklist all devices + * which are not explicitly whitelisted. + */ + if (dm->eth_if_whitelist && + !strstr ((char *)dm->eth_if_whitelist, (char *)devname)) + continue; + +#ifdef NETMAP + /* + * Optimistically open the device as a netmap device. + */ + if (eth_nm_open((char *)devname)) + continue; +#endif + + _vec_len (path) = 0; + path = format (path, "/sys/bus/pci/devices/%s/driver/unbind%c", + devname, 0); + + /* + * If the device is bound to a driver... + */ + if (stat ((const char *)path, &st) == 0) + { + u8 * device_path; + + /* + * If the interface is not a virtio... + */ + if (!driver_name || strcmp(driver_name, VIRTIO_PCI_NAME)) + { + /* + * If it is already bound to driver, don't unbind/bind it. + */ + device_path = format (0, "/sys/bus/pci/drivers/%s/%s/device%c", + driver_name, devname, 0); + if (stat ((const char *)device_path, &st) == 0) + bind_uio = 0; + + vec_free (device_path); + } + + /* + * unbind it from the current driver + */ + if (bind_uio) + { + _vec_len (path) -= 1; + path = format (path, "%c", 0); + error = write_sys_fs ((char *)path, "%s", devname); + if (error) + goto done; + } + } + + /* + * DAW-FIXME: The following bind/unbind dance is necessary for the dpdk + * virtio poll-mode driver to work. + */ + + if (driver_name && !strcmp(driver_name, VIRTIO_PCI_NAME)) + { + /* + * bind interface to the native kernel module + */ + _vec_len (path) = 0; + path = format (path, "/sys/bus/pci/drivers/%s/bind%c", + driver_name, 0); + error = write_sys_fs ((char *)path, "%s", devname); + if (error) + goto done; + + /* + * unbind interface from the native kernel module + */ + _vec_len (path) -= 5; + path = format (path, "unbind%c", 0); + error = write_sys_fs ((char *)path, "%s", devname); + if (error) + goto done; + } + + /* + * bind the interface to igb_uio + */ + if (bind_uio) + { + int pci_vendor_id = strtol((char *) pci_vid, NULL, 16); + int pci_device_id = strtol((char *) pci_did, NULL, 16); + + /* + * Set PCI ID to ".../virtio-pci/new_id" for Intel fortvile adapaters + */ + if (pci_vendor_id == 0x8086 && + (pci_device_id == I40E_DEV_ID_10G_BASE_T || + pci_device_id == I40E_DEV_ID_SFP_XL710 || + pci_device_id == I40E_DEV_ID_QSFP_A || + pci_device_id == I40E_DEV_ID_QSFP_B || + pci_device_id == I40E_DEV_ID_QSFP_C)) + { + _vec_len (path) = 0; + path = format (path, "/sys/bus/pci/drivers/%s/new_id%c", driver_name, 0); + error = write_sys_fs ((char *) path, "%s %s", pci_vid, pci_did); + if (error) + continue; + } + + _vec_len (path) = 0; + path = format (path, "/sys/bus/pci/drivers/%s/bind%c", driver_name, 0); + error = write_sys_fs ((char *) path, "%s", devname); + if (error) + { + error = 0; + continue; + } + } + } + + done: + vec_free (line); + vec_free (path); + vec_free (devname); + vec_free (pci_vid); + vec_free (pci_did); + vec_free (modcmd); + pclose (fp); + return error; +} + +static uword +unformat_socket_mem (unformat_input_t * input, va_list * va) +{ + uword ** r = va_arg (* va, uword **); + int i = 0; + u32 mem; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, ",")) + hash_set (*r, i, 1024); + else if (unformat (input, "%u,", &mem)) + hash_set (*r, i, mem); + else if (unformat (input, "%u", &mem)) + hash_set (*r, i, mem); + else + { + unformat_put_input (input); + goto done; + } + i++; + } + +done: + return 1; +} + +static u32 +get_node_free_hugepages_num (u32 node, u32 page_size) +{ + FILE * fp; + u8 * tmp; + + tmp = format (0, "/sys/devices/system/node/node%u/hugepages/hugepages-%ukB/" + "free_hugepages%c", node, page_size, 0); + fp = fopen ((char *) tmp, "r"); + vec_free(tmp); + + if (fp != NULL) + { + u8 * buffer = 0; + u32 pages_avail = 0; + + vec_validate (buffer, 256-1); + if (fgets ((char *)buffer, 256, fp)) + { + unformat_input_t in; + unformat_init_string (&in, (char *) buffer, strlen ((char *) buffer)); + unformat(&in, "%u", &pages_avail); + unformat_free (&in); + } + vec_free(buffer); + fclose(fp); + return pages_avail; + } + + return 0; +} + +static clib_error_t * +dpdk_config (vlib_main_t * vm, unformat_input_t * input) +{ + clib_error_t * error = 0; + dpdk_main_t * dm = &dpdk_main; + vlib_thread_main_t * tm = vlib_get_thread_main(); + u8 * s, * tmp = 0; + u8 * pci_dev_id = 0; + u8 * rte_cmd = 0, * ethname = 0; + FILE * rte_fp; + u32 log_level; + int ret, i; + char * fmt; +#ifdef NETMAP + int rxrings, txrings, rxslots, txslots, txburst; + char * nmnam; +#endif + unformat_input_t _in; + unformat_input_t * in = &_in; + u8 no_pci = 0; + u8 no_huge = 0; + u8 huge_dir = 0; + u8 file_prefix = 0; + u8 * socket_mem = 0; + + // MATT-FIXME: inverted virtio-vhost logic to use virtio by default + dm->use_virtio_vhost = 1; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + /* Prime the pump */ + if (unformat (input, "no-hugetlb")) + { + vec_add1 (dm->eal_init_args, (u8 *) "no-huge"); + no_huge = 1; + } + + else if (unformat (input, "decimal-interface-names")) + dm->interface_name_format_decimal = 1; + + else if (unformat (input, "no-multi-seg")) + dm->no_multi_seg = 1; + + else if (unformat (input, "dev %s", &pci_dev_id)) + { + if (dm->eth_if_whitelist) + { + /* + * Don't add duplicate device id's. + */ + if (strstr ((char *)dm->eth_if_whitelist, (char *)pci_dev_id)) + continue; + + _vec_len (dm->eth_if_whitelist) -= 1; // chomp trailing NULL. + dm->eth_if_whitelist = format (dm->eth_if_whitelist, " %s%c", + pci_dev_id, 0); + } + else + dm->eth_if_whitelist = format (0, "%s%c", pci_dev_id, 0); + } + +#ifdef NETMAP + else if (unformat(input, "netmap %s/%d:%d/%d:%d/%d", + &nmname, &rxrings, &rxslots, &txrings, &txslots, &txburst)) { + char * rv; + rv = (char *) + eth_nm_args(nmname, rxrings, rxslots, txrings, txslots, txburst); + if (rv) { + error = clib_error_return (0, "%s", rv); + goto done; + } + }else if (unformat(input, "netmap %s", &nmname)) { + char * rv; + rv = (char *) + eth_nm_args(nmname, 0, 0, 0, 0, 0); + if (rv) { + error = clib_error_return (0, "%s", rv); + goto done; + } + } +#endif + + else if (unformat (input, "num-mbufs %d", &dm->num_mbufs)) + ; + else if (unformat (input, "kni %d", &dm->num_kni)) + ; + else if (unformat (input, "uio-driver %s", &dm->uio_driver_name)) + ; + else if (unformat (input, "vhost-user-coalesce-frames %d", &dm->vhost_coalesce_frames)) + ; + else if (unformat (input, "vhost-user-coalesce-time %f", &dm->vhost_coalesce_time)) + ; + else if (unformat (input, "enable-vhost-user")) + dm->use_virtio_vhost = 0; + else if (unformat (input, "rss %d", &dm->use_rss)) + ; + +#define _(a) \ + else if (unformat(input, #a)) \ + { \ + if (!strncmp(#a, "no-pci", 6)) \ + no_pci = 1; \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (dm->eal_init_args, tmp); \ + } + foreach_eal_double_hyphen_predicate_arg +#undef _ + +#define _(a) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + if (!strncmp(#a, "huge-dir", 8)) \ + huge_dir = 1; \ + else if (!strncmp(#a, "file-prefix", 11)) \ + file_prefix = 1; \ + else if (!strncmp(#a, "socket-mem", 10)) \ + socket_mem = vec_dup (s); \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (dm->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (dm->eal_init_args, s); \ + } + foreach_eal_double_hyphen_arg +#undef _ + +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (dm->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (dm->eal_init_args, s); \ + } + foreach_eal_single_hyphen_arg +#undef _ + +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (dm->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (dm->eal_init_args, s); \ + dm->a##_set_manually = 1; \ + } + foreach_eal_single_hyphen_mandatory_arg +#undef _ + + else if (unformat(input, "default")) + ; + + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (!dm->uio_driver_name) + dm->uio_driver_name = format (0, "igb_uio"); + + /* + * Use 1G huge pages if available. + */ + if (!no_huge && !huge_dir) + { + uword * mem_by_socket = hash_create (0, sizeof (uword)); + uword c; + u8 use_1g = 1; + u8 use_2m = 1; + int rv; + + umount(DEFAULT_HUGE_DIR); + + /* Process "socket-mem" parameter value */ + if (vec_len (socket_mem)) + { + unformat_input_t in; + unformat_init_vector(&in, socket_mem); + unformat(&in, "%U", unformat_socket_mem, &mem_by_socket); + unformat_free(&in); + } + else + use_1g = 0; + + /* check if available enough 1GB pages for each socket */ + clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ({ + uword * p = hash_get (mem_by_socket, c); + if (p) + { + u32 mem = p[0]; + if (mem) + { + u32 pages_num_1g = mem / 1024; + u32 pages_num_2m = mem / 2; + u32 pages_avail; + + pages_avail = get_node_free_hugepages_num(c, 1048576); + if (!(pages_avail >= pages_num_1g)) + use_1g = 0; + + pages_avail = get_node_free_hugepages_num(c, 2048); + if (!(pages_avail >= pages_num_2m)) + use_2m = 0; + } + } + })); + + hash_free (mem_by_socket); + + rv = mkdir(VPP_RUN_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + VPP_RUN_DIR, errno); + goto done; + } + + rv = mkdir(DEFAULT_HUGE_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + DEFAULT_HUGE_DIR, errno); + goto done; + } + + if (use_1g) + { + rv = mount("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); + } + else if (use_2m) + { + rv = mount("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); + } + else + { + return clib_error_return (0, "not enough free huge pages"); + } + + if (rv) + { + error = clib_error_return (0, "mount failed %d", errno); + goto done; + } + + tmp = format (0, "--huge-dir%c", 0); + vec_add1 (dm->eal_init_args, tmp); + tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); + vec_add1 (dm->eal_init_args, tmp); + if (!file_prefix) + { + tmp = format (0, "--file-prefix%c", 0); + vec_add1 (dm->eal_init_args, tmp); + tmp = format (0, "vpp%c", 0); + vec_add1 (dm->eal_init_args, tmp); + } + } + + /* + * Blacklist all ethernet interfaces in the linux IP routing tables. + */ + dm->eth_if_blacklist = format (0, "%c", 0); + rte_cmd = format (0, "route --inet --inet6 -n|awk '{print $7}'|sort -u|" + "egrep $(echo $(ls -1d /sys/class/net/*/device|" + "cut -d/ -f5)|sed -s 's/ /|/g')%c", 0); + if ((rte_fp = popen ((const char *)rte_cmd, "r")) == NULL) + { + error = clib_error_return_unix (0, "Unable to find blacklist ethernet" + " interface(s) in linux routing tables."); + goto rte_cmd_err; + + } + + vec_validate (ethname, BUFSIZ); + while (fgets ((char *)ethname, BUFSIZ, rte_fp) != NULL) + { + FILE *rlnk_fp; + u8 * rlnk_cmd = 0, * devname = 0; + + ethname[strlen ((char *)ethname) - 1] = 0; // chomp trailing newline. + + rlnk_cmd = format (0, "readlink /sys/class/net/%s%c", + ethname, 0); + + if ((rlnk_fp = popen ((const char *)rlnk_cmd, "r")) == NULL) + { + error = clib_error_return_unix (0, "Unable to read %s link.", + ethname); + goto rlnk_cmd_err; + } + + vec_validate (devname, BUFSIZ); + while (fgets ((char *)devname, BUFSIZ, rlnk_fp) != NULL) + { + char * pci_id = 0; + + /* + * Extract the device PCI ID name from the link. It is the first + * PCI ID searching backwards from the end of the link pathname. + * For example: + * readlink /sys/class/net/eth0 + * ../../devices/pci0000:00/0000:00:0a.0/virtio4/net/eth0 + */ + for (pci_id = (char *)((devname + strlen((char *)devname))); + ((u8 *)pci_id > devname) && *pci_id != '.'; pci_id--) + ; + + /* + * Verify that the field found is a valid PCI ID. + */ + if ((*(pci_id - 1) == '.') || ((u8 *)(pci_id - 11) < devname) || + (*(pci_id - 11) != '/') || (*(pci_id - 3) != ':') || + (*(pci_id - 6) != ':')) + { + devname[strlen ((char *)devname) - 1] = 0; // chomp trailing newline. + clib_warning ("Unable to extract %s PCI ID (0x%llx \"%s\") " + "from 0x%llx \"%s\"", ethname, pci_id, pci_id, + devname, devname); + continue; + } + + pci_id[2] = 0; + pci_id -= 10; + + /* Don't blacklist any interfaces which have been whitelisted. + */ + if (dm->eth_if_whitelist && + strstr ((char *)dm->eth_if_whitelist, (char *)pci_id)) + continue; + + _vec_len (dm->eth_if_blacklist) -= 1; // chomp trailing NULL. + dm->eth_if_blacklist = format (dm->eth_if_blacklist, " %s%c", + pci_id, 0); + } + + rlnk_cmd_err: + pclose (rlnk_fp); + vec_free (rlnk_cmd); + vec_free (devname); + } + + rte_cmd_err: + pclose (rte_fp); + vec_free (rte_cmd); + vec_free (ethname); + + if (error) + return error; + + /* I'll bet that -c and -n must be the first and second args... */ + if (!dm->coremask_set_manually) + { + vlib_thread_registration_t * tr; + uword coremask; + int i; + + /* main thread core */ + coremask = 1 << tm->main_lcore; + + for (i = 0; i < vec_len (tm->registrations); i++) + { + tr = tm->registrations[i]; + if (clib_bitmap_is_zero(tr->coremask)) + continue; + coremask |= tr->coremask[0]; + } + + vec_insert (dm->eal_init_args, 2, 1); + dm->eal_init_args[1] = (u8 *) "-c"; + tmp = format (0, "%x%c", coremask, 0); + dm->eal_init_args[2] = tmp; + } + + if (!dm->nchannels_set_manually) + { + vec_insert (dm->eal_init_args, 2, 3); + dm->eal_init_args[3] = (u8 *) "-n"; + tmp = format (0, "%d", dm->nchannels); + dm->eal_init_args[4] = tmp; + } + + /* + * If there are whitelisted devices, + * add the whitelist option & device list to the dpdk arg list... + */ + if (dm->eth_if_whitelist) + { + unformat_init_string (in, (char *)dm->eth_if_whitelist, + vec_len(dm->eth_if_whitelist) - 1); + fmt = "-w%c"; + } + + /* + * Otherwise add the blacklisted devices to the dpdk arg list. + */ + else + { + unformat_init_string (in, (char *)dm->eth_if_blacklist, + vec_len(dm->eth_if_blacklist) - 1); + fmt = "-b%c"; + } + + while (unformat_check_input (in) != UNFORMAT_END_OF_INPUT) + { + tmp = format (0, fmt, 0); + vec_add1 (dm->eal_init_args, tmp); + unformat (in, "%s", &pci_dev_id); + vec_add1 (dm->eal_init_args, pci_dev_id); + } + + if (no_pci == 0) + { + /* + * Bind Virtio pci devices to the igb_uio kernel driver. + */ + error = dpdk_bind_eth_kernel_drivers (vm, "1af4:1000", VIRTIO_PCI_NAME); + if (error) + return error; + + /* + * Bind vmxnet3 pci devices to the igb_uio kernel driver. + */ + error = dpdk_bind_eth_kernel_drivers (vm, "15ad:07b0", + (char *) dm->uio_driver_name); + if (error) + return error; + + /* + * Bind Intel ethernet pci devices to igb_uio kernel driver. + */ + error = dpdk_bind_eth_kernel_drivers (vm, "8086:", + (char *) dm->uio_driver_name); + /* + * Bind Cisco VIC ethernet pci devices to igb_uio kernel driver. + */ + error = dpdk_bind_eth_kernel_drivers (vm, "1137:0043", + (char *) dm->uio_driver_name); + } + + /* set master-lcore */ + tmp = format (0, "--master-lcore%c", 0); + vec_add1 (dm->eal_init_args, tmp); + tmp = format (0, "%u%c", tm->main_lcore, 0); + vec_add1 (dm->eal_init_args, tmp); + + /* NULL terminate the "argv" vector, in case of stupidity */ + vec_add1 (dm->eal_init_args, 0); + _vec_len(dm->eal_init_args) -= 1; + + /* Set up DPDK eal and packet mbuf pool early. */ + + log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE; + + rte_set_log_level (log_level); + + vm = dm->vlib_main; + + ret = rte_eal_init(vec_len(dm->eal_init_args), (char **) dm->eal_init_args); + + /* lazy umount hugepages */ + umount2(DEFAULT_HUGE_DIR, MNT_DETACH); + + if (ret < 0) + return clib_error_return (0, "rte_eal_init returned %d", ret); + + /* main thread 1st */ + error = vlib_buffer_pool_create(vm, dm->num_mbufs, MBUF_SIZE, rte_socket_id()); + if (error) + return error; + + for (i = 0; i < RTE_MAX_LCORE; i++) + { + error = vlib_buffer_pool_create(vm, dm->num_mbufs, MBUF_SIZE, + rte_lcore_to_socket_id(i)); + if (error) + return error; + } + + if (dm->use_rss) + { + vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, dpdk_input_node.index); + rt->function = dpdk_input_rss; + } + done: + return error; +} + +VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk"); + +void dpdk_update_link_state (dpdk_device_t * xd, f64 now) +{ + vnet_main_t * vnm = vnet_get_main(); + struct rte_eth_link prev_link = xd->link; + u32 hw_flags = 0; + u8 hw_flags_chg = 0; + + /* only update link state for PMD interfaces */ + if (xd->dev_type != VNET_DPDK_DEV_ETH) + return; + + xd->time_last_link_update = now ? now : xd->time_last_link_update; + memset(&xd->link, 0, sizeof(xd->link)); + rte_eth_link_get_nowait (xd->device_index, &xd->link); + + if (LINK_STATE_ELOGS) + { + vlib_main_t * vm = vlib_get_main(); + ELOG_TYPE_DECLARE(e) = { + .format = + "update-link-state: sw_if_index %d, admin_up %d," + "old link_state %d new link_state %d", + .format_args = "i4i1i1i1", + }; + + struct { u32 sw_if_index; u8 admin_up; + u8 old_link_state; u8 new_link_state;} *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->admin_up = xd->admin_up; + ed->old_link_state = (u8) + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); + ed->new_link_state = (u8) xd->link.link_status; + } + + if ((xd->admin_up == 1) && + ((xd->link.link_status != 0) ^ + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) + { + hw_flags_chg = 1; + hw_flags |= (xd->link.link_status ? + VNET_HW_INTERFACE_FLAG_LINK_UP: 0); + } + + if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) + { + hw_flags_chg = 1; + switch (xd->link.link_duplex) + { + case ETH_LINK_HALF_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; + break; + case ETH_LINK_FULL_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; + break; + default: + break; + } + } + if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) + { + hw_flags_chg = 1; + switch (xd->link.link_speed) + { + case ETH_LINK_SPEED_10: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; + break; + case ETH_LINK_SPEED_100: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; + break; + case ETH_LINK_SPEED_1000: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; + break; + case ETH_LINK_SPEED_10000: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; + break; + case ETH_LINK_SPEED_40G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; + break; + case 0: + break; + default: + clib_warning("unknown link speed %d", xd->link.link_speed); + break; + } + } + if (hw_flags_chg) + { + if (LINK_STATE_ELOGS) + { + vlib_main_t * vm = vlib_get_main(); + + ELOG_TYPE_DECLARE(e) = { + .format = "update-link-state: sw_if_index %d, new flags %d", + .format_args = "i4i4", + }; + + struct { u32 sw_if_index; u32 flags; } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->flags = hw_flags; + } + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); + } +} + +static uword +dpdk_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + clib_error_t * error; + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + vlib_thread_main_t * tm = vlib_get_thread_main(); + void *vu_state; + int i; + + error = dpdk_lib_init (dm); + + /* + * Turn on the input node if we found some devices to drive + * and we're not running worker threads or i/o threads + */ + + if (error == 0 && vec_len(dm->devices) > 0) + { + if (tm->n_vlib_mains == 1) + vlib_node_set_state (vm, dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + else if (tm->main_thread_is_io_node) + vlib_node_set_state (vm, dpdk_io_input_node.index, + VLIB_NODE_STATE_POLLING); + else if (!dm->have_io_threads) + for (i=0; i < tm->n_vlib_mains; i++) + if (vec_len(dm->devices_by_cpu[i]) > 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + } + + if (error) + clib_error_report (error); + + dpdk_vhost_user_process_init(&vu_state); + + dm->io_thread_release = 1; + + f64 now = vlib_time_now (vm); + vec_foreach (xd, dm->devices) + { + dpdk_update_link_state (xd, now); + } + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, 5.0); + + if (dpdk_get_admin_up_down_in_progress()) + /* skip the poll if an admin up down is in progress (on any interface) */ + continue; + + vec_foreach (xd, dm->devices) + { + f64 now = vlib_time_now (vm); + if ((now - xd->time_last_stats_update) >= DPDK_STATS_POLL_INTERVAL) + dpdk_update_counters (xd, now); + if ((now - xd->time_last_link_update) >= DPDK_LINK_POLL_INTERVAL) + dpdk_update_link_state (xd, now); + + if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) + if (dpdk_vhost_user_process_if(vm, xd, vu_state) != 0) + continue; + } + } + + dpdk_vhost_user_process_cleanup(vu_state); + + return 0; +} + +VLIB_REGISTER_NODE (dpdk_process_node,static) = { + .function = dpdk_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dpdk-process", + .process_log2_n_stack_bytes = 17, +}; + +clib_error_t * +dpdk_init (vlib_main_t * vm) +{ + dpdk_main_t * dm = &dpdk_main; + vlib_node_t * ei; + clib_error_t * error = 0; + vlib_thread_main_t * tm = vlib_get_thread_main(); + + /* verify that structs are cacheline aligned */ + ASSERT(offsetof(dpdk_device_t, cacheline0) == 0); + ASSERT(offsetof(dpdk_device_t, cacheline1) == CLIB_CACHE_LINE_BYTES); + ASSERT(offsetof(dpdk_worker_t, cacheline0) == 0); + ASSERT(offsetof(frame_queue_trace_t, cacheline0) == 0); + + /* Add references to DPDK Driver Constructor functions to get the dynamic + * loader to pull in the driver library & run the constructors. + */ +#define _(d) \ +do { \ + void devinitfn_ ##d(void); \ + __attribute__((unused)) void (* volatile pf)(void); \ + pf = devinitfn_ ##d; \ +} while(0); + +#ifdef RTE_LIBRTE_EM_PMD + _(em_pmd_drv) +#endif + +#ifdef RTE_LIBRTE_IGB_PMD + _(pmd_igb_drv) +#endif + +#ifdef RTE_LIBRTE_IXGBE_PMD + _(rte_ixgbe_driver) +#endif + +#ifdef RTE_LIBRTE_I40E_PMD + _(rte_i40e_driver) + _(rte_i40evf_driver) +#endif + +#ifdef RTE_LIBRTE_FM10K_PMD + _(rte_fm10k_driver) +#endif + +#ifdef RTE_LIBRTE_VIRTIO_PMD + _(rte_virtio_driver) +#endif + +#ifdef RTE_LIBRTE_VMXNET3_PMD + _(rte_vmxnet3_driver) +#endif + +#ifdef RTE_LIBRTE_VICE_PMD + _(rte_vice_driver) +#endif + +#ifdef RTE_LIBRTE_ENIC_PMD + _(rte_enic_driver) +#endif + +#ifdef RTE_LIBRTE_PMD_AF_PACKET + _(pmd_af_packet_drv) +#endif + +#undef _ + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main(); + + ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); + if (ei == 0) + return clib_error_return (0, "ethernet-input node AWOL"); + + dm->ethernet_input_node_index = ei->index; + + dm->nchannels = 4; + dm->num_mbufs = dm->num_mbufs ? dm->num_mbufs : NB_MBUF; + vec_add1 (dm->eal_init_args, (u8 *) "vnet"); + + dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword)); + + /* $$$ use n_thread_stacks since it's known-good at this point */ + vec_validate (dm->recycle, tm->n_thread_stacks - 1); + + /* initialize EFD (early fast discard) default settings */ + dm->efd.enabled = DPDK_EFD_DISABLED; + dm->efd.queue_hi_thresh = ((DPDK_EFD_DEFAULT_DEVICE_QUEUE_HI_THRESH_PCT * + DPDK_NB_RX_DESC_10GE)/100); + dm->efd.consec_full_frames_hi_thresh = + DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH; + + /* vhost-user coalescence frames defaults */ + dm->vhost_coalesce_frames = 32; + dm->vhost_coalesce_time = 1e-3; + + /* init CLI */ + if ((error = vlib_call_init_function (vm, dpdk_cli_init))) + return error; + + return error; +} + +VLIB_INIT_FUNCTION (dpdk_init); + diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c new file mode 100644 index 00000000000..fde0eb23e14 --- /dev/null +++ b/vnet/vnet/devices/dpdk/node.c @@ -0,0 +1,2010 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/error.h> +#include <vppinfra/format.h> +#include <vppinfra/xxhash.h> + +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/dpdk/dpdk.h> +#include <vnet/classify/vnet_classify.h> +#include <vnet/mpls-gre/packet.h> + +#include "dpdk_priv.h" + +#ifndef MAX +#define MAX(a,b) ((a) < (b) ? (b) : (a)) +#endif + +#ifndef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +/* + * At least in certain versions of ESXi, vmware e1000's don't honor the + * "strip rx CRC" bit. Set this flag to work around that bug FOR UNIT TEST ONLY. + * + * If wireshark complains like so: + * + * "Frame check sequence: 0x00000000 [incorrect, should be <hex-num>]" + * and you're using ESXi emulated e1000's, set this flag FOR UNIT TEST ONLY. + * + * Note: do NOT check in this file with this workaround enabled! You'll lose + * actual data from e.g. 10xGE interfaces. The extra 4 bytes annoy + * wireshark, but they're harmless... + */ +#define VMWARE_LENGTH_BUG_WORKAROUND 0 + +typedef struct { + u32 cached_next_index; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} handoff_dispatch_main_t; + +typedef struct { + u32 buffer_index; + u32 next_index; + u32 sw_if_index; +} handoff_dispatch_trace_t; + +/* packet trace format function */ +static u8 * format_handoff_dispatch_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + handoff_dispatch_trace_t * t = va_arg (*args, handoff_dispatch_trace_t *); + + s = format (s, "HANDOFF_DISPATCH: sw_if_index %d next_index %d buffer 0x%x", + t->sw_if_index, + t->next_index, + t->buffer_index); + return s; +} + +handoff_dispatch_main_t handoff_dispatch_main; + +vlib_node_registration_t handoff_dispatch_node; + +#define foreach_handoff_dispatch_error \ +_(EXAMPLE, "example packets") + +typedef enum { +#define _(sym,str) HANDOFF_DISPATCH_ERROR_##sym, + foreach_handoff_dispatch_error +#undef _ + HANDOFF_DISPATCH_N_ERROR, +} handoff_dispatch_error_t; + +static char * handoff_dispatch_error_strings[] = { +#define _(sym,string) string, + foreach_handoff_dispatch_error +#undef _ +}; + +static inline +void vlib_put_handoff_queue_elt (vlib_frame_queue_elt_t * hf) +{ + CLIB_MEMORY_BARRIER(); + hf->valid = 1; +} + +static uword +handoff_dispatch_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + dpdk_rx_next_t next_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + next0 = vnet_buffer(b0)->io_handoff.next_index; + next1 = vnet_buffer(b1)->io_handoff.next_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); + handoff_dispatch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->buffer_index = bi0; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */ 0); + handoff_dispatch_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->buffer_index = bi1; + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + next0 = vnet_buffer(b0)->io_handoff.next_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); + handoff_dispatch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->buffer_index = bi0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (handoff_dispatch_node) = { + .function = handoff_dispatch_node_fn, + .name = "handoff-dispatch", + .vector_size = sizeof (u32), + .format_trace = format_handoff_dispatch_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .flags = VLIB_NODE_FLAG_IS_HANDOFF, + + .n_errors = ARRAY_LEN(handoff_dispatch_error_strings), + .error_strings = handoff_dispatch_error_strings, + + .n_next_nodes = DPDK_RX_N_NEXT, + + .next_nodes = { + [DPDK_RX_NEXT_DROP] = "error-drop", + [DPDK_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", + [DPDK_RX_NEXT_IP4_INPUT] = "ip4-input", + [DPDK_RX_NEXT_IP6_INPUT] = "ip6-input", + [DPDK_RX_NEXT_MPLS_INPUT] = "mpls-gre-input", + }, +}; + +clib_error_t *handoff_dispatch_init (vlib_main_t *vm) +{ + handoff_dispatch_main_t * mp = &handoff_dispatch_main; + + mp->vlib_main = vm; + mp->vnet_main = &vnet_main; + + return 0; +} + +VLIB_INIT_FUNCTION (handoff_dispatch_init); + +u32 dpdk_get_handoff_node_index (void) +{ + return handoff_dispatch_node.index; +} + +static char * dpdk_error_strings[] = { +#define _(n,s) s, + foreach_dpdk_error +#undef _ +}; + +typedef struct { + u32 buffer_index; + u16 device_index; + u16 queue_index; + struct rte_mbuf mb; + vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */ +} dpdk_rx_dma_trace_t; + +static u8 * format_dpdk_rx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main(); + dpdk_rx_dma_trace_t * t = va_arg (*va, dpdk_rx_dma_trace_t *); + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd = vec_elt_at_index (dm->devices, t->device_index); + format_function_t * f; + uword indent = format_get_indent (s); + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + + s = format (s, "%U rx queue %d", + format_vnet_sw_interface_name, vnm, sw, + t->queue_index); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, + format_vlib_buffer, &t->buffer); + +#ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS + s = format (s, "\n%U%U", + format_white_space, indent, + format_dpdk_rx_rte_mbuf, &t->mb); +#else + s = format (s, "\n%U%U", + format_white_space, indent, + format_dpdk_rte_mbuf, &t->mb); +#endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */ + f = node->format_buffer; + if (!f) + f = format_hex_bytes; + s = format (s, "\n%U%U", format_white_space, indent, + f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); + + return s; +} + +always_inline void +dpdk_rx_next_and_error_from_mb_flags_x1 (dpdk_device_t *xd, struct rte_mbuf *mb, + vlib_buffer_t *b0, + u8 * next0, u8 * error0) +{ + u8 is0_ip4, is0_ip6, is0_mpls, n0; + uint16_t mb_flags = mb->ol_flags; + + if (PREDICT_FALSE(mb_flags & ( +#ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS + PKT_EXT_RX_PKT_ERROR | PKT_EXT_RX_BAD_FCS | +#endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */ + PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD + ))) + { + /* some error was flagged. determine the drop reason */ + n0 = DPDK_RX_NEXT_DROP; + *error0 = +#ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS + (mb_flags & PKT_EXT_RX_PKT_ERROR) ? DPDK_ERROR_RX_PACKET_ERROR : + (mb_flags & PKT_EXT_RX_BAD_FCS) ? DPDK_ERROR_RX_BAD_FCS : +#endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */ + (mb_flags & PKT_RX_IP_CKSUM_BAD) ? DPDK_ERROR_IP_CHECKSUM_ERROR : + (mb_flags & PKT_RX_L4_CKSUM_BAD) ? DPDK_ERROR_L4_CHECKSUM_ERROR : + DPDK_ERROR_NONE; + } + else + { + *error0 = DPDK_ERROR_NONE; + if (xd->per_interface_next_index != ~0) + n0 = xd->per_interface_next_index; + else if (mb_flags & PKT_RX_VLAN_PKT) + n0 = DPDK_RX_NEXT_ETHERNET_INPUT; + else + { + n0 = DPDK_RX_NEXT_ETHERNET_INPUT; +#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0) + is0_ip4 = (mb->packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV4_EXT)) != 0; +#else + is0_ip4 = (mb_flags & (PKT_RX_IPV4_HDR | PKT_RX_IPV4_HDR_EXT)) != 0; +#endif + + if (PREDICT_TRUE(is0_ip4)) + n0 = DPDK_RX_NEXT_IP4_INPUT; + else + { +#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0) + is0_ip6 = + (mb->packet_type & (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L3_IPV6_EXT)) != 0; +#else + is0_ip6 = + (mb_flags & (PKT_RX_IPV6_HDR | PKT_RX_IPV6_HDR_EXT)) != 0; +#endif + if (PREDICT_TRUE(is0_ip6)) + n0 = DPDK_RX_NEXT_IP6_INPUT; + else + { + ethernet_header_t *h0 = (ethernet_header_t *) b0->data; + is0_mpls = (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)); + n0 = is0_mpls ? DPDK_RX_NEXT_MPLS_INPUT : n0; + } + } + } + } + *next0 = n0; +} + +void dpdk_rx_trace (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, + u32 * buffers, + uword n_buffers) +{ + vlib_main_t * vm = vlib_get_main(); + u32 * b, n_left; + u8 next0; + + n_left = n_buffers; + b = buffers; + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + dpdk_rx_dma_trace_t * t0; + struct rte_mbuf *mb; + u8 error0; + + bi0 = b[0]; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + mb = ((struct rte_mbuf *)b0) - 1; + dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb, b0, + &next0, &error0); + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->queue_index = queue_id; + t0->device_index = xd->device_index; + t0->buffer_index = bi0; + + memcpy (&t0->mb, mb, sizeof (t0->mb)); + memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + memcpy (t0->buffer.pre_data, b0->data, sizeof (t0->buffer.pre_data)); + +#ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS + /* + * Clear overloaded TX offload flags when a DPDK driver + * is using them for RX flags (e.g. Cisco VIC Ethernet driver) + */ + mb->ol_flags &= PKT_EXT_RX_CLR_TX_FLAGS_MASK; +#endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */ + + b += 1; + } +} + +/* + * dpdk_efd_update_counters() + * Update EFD (early-fast-discard) counters + */ +void dpdk_efd_update_counters (dpdk_device_t *xd, + u32 n_buffers, + u16 enabled) +{ + if (enabled & DPDK_EFD_MONITOR_ENABLED) + { + u64 now = clib_cpu_time_now(); + if (xd->efd_agent.last_poll_time > 0) + { + u64 elapsed_time = (now - xd->efd_agent.last_poll_time); + if (elapsed_time > xd->efd_agent.max_poll_delay) + xd->efd_agent.max_poll_delay = elapsed_time; + } + xd->efd_agent.last_poll_time = now; + } + + xd->efd_agent.total_packet_cnt += n_buffers; + xd->efd_agent.last_burst_sz = n_buffers; + + if (n_buffers > xd->efd_agent.max_burst_sz) + xd->efd_agent.max_burst_sz = n_buffers; + + if (PREDICT_FALSE(n_buffers == VLIB_FRAME_SIZE)) + { + xd->efd_agent.full_frames_cnt++; + xd->efd_agent.consec_full_frames_cnt++; + } + else + { + xd->efd_agent.consec_full_frames_cnt = 0; + } +} + +/* is_efd_discardable() + * returns non zero DPDK error if packet meets early-fast-discard criteria, + * zero otherwise + */ +u32 is_efd_discardable (vlib_thread_main_t *tm, + vlib_buffer_t * b0, + struct rte_mbuf *mb) +{ + ethernet_header_t *eh = (ethernet_header_t *) b0->data; + + if (eh->type == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) + { + ip4_header_t *ipv4 = + (ip4_header_t *)&(b0->data[sizeof(ethernet_header_t)]); + u8 pkt_prec = (ipv4->tos >> 5); + + return (tm->efd.ip_prec_bitmap & (1 << pkt_prec) ? + DPDK_ERROR_IPV4_EFD_DROP_PKTS : DPDK_ERROR_NONE); + } + else if (eh->type == clib_net_to_host_u16(ETHERNET_TYPE_IP6)) + { + ip6_header_t *ipv6 = + (ip6_header_t *)&(b0->data[sizeof(ethernet_header_t)]); + u8 pkt_tclass = + ((ipv6->ip_version_traffic_class_and_flow_label >> 20) & 0xff); + + return (tm->efd.ip_prec_bitmap & (1 << pkt_tclass) ? + DPDK_ERROR_IPV6_EFD_DROP_PKTS : DPDK_ERROR_NONE); + } + else if (eh->type == clib_net_to_host_u16(ETHERNET_TYPE_MPLS_UNICAST)) + { + mpls_unicast_header_t *mpls = + (mpls_unicast_header_t *)&(b0->data[sizeof(ethernet_header_t)]); + u8 pkt_exp = ((mpls->label_exp_s_ttl >> 9) & 0x07); + + return (tm->efd.mpls_exp_bitmap & (1 << pkt_exp) ? + DPDK_ERROR_MPLS_EFD_DROP_PKTS : DPDK_ERROR_NONE); + } + else if ((eh->type == clib_net_to_host_u16(ETHERNET_TYPE_VLAN)) || + (eh->type == clib_net_to_host_u16(ETHERNET_TYPE_DOT1AD))) + { + ethernet_vlan_header_t *vlan = + (ethernet_vlan_header_t *)&(b0->data[sizeof(ethernet_header_t)]); + u8 pkt_cos = ((vlan->priority_cfi_and_id >> 13) & 0x07); + + return (tm->efd.vlan_cos_bitmap & (1 << pkt_cos) ? + DPDK_ERROR_VLAN_EFD_DROP_PKTS : DPDK_ERROR_NONE); + } + + return DPDK_ERROR_NONE; +} + +/* + * This function is used when there are no worker threads. + * The main thread performs IO and forwards the packets. + */ +static inline u32 dpdk_device_input ( dpdk_main_t * dm, + dpdk_device_t * xd, + vlib_node_runtime_t * node, + u32 cpu_index, + u16 queue_id) +{ + u32 n_buffers; + u32 next_index = DPDK_RX_NEXT_ETHERNET_INPUT; + u32 n_left_to_next, * to_next; + u32 mb_index; + vlib_main_t * vm = vlib_get_main(); + uword n_rx_bytes = 0; + u32 n_trace, trace_cnt __attribute__((unused)); + vlib_buffer_free_list_t * fl; + u8 efd_discard_burst = 0; + + if (xd->admin_up == 0) + return 0; + + n_buffers = dpdk_rx_burst(dm, xd, queue_id); + + if (n_buffers == 0) + { + /* check if EFD (dpdk) is enabled */ + if (PREDICT_FALSE(dm->efd.enabled)) + { + /* reset a few stats */ + xd->efd_agent.last_poll_time = 0; + xd->efd_agent.last_burst_sz = 0; + } + return 0; + } + + vec_reset_length (xd->d_trace_buffers); + trace_cnt = n_trace = vlib_get_trace_count (vm, node); + + fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + /* + * DAW-FIXME: VMXNET3 device stop/start doesn't work, + * therefore fake the stop in the dpdk driver by + * silently dropping all of the incoming pkts instead of + * stopping the driver / hardware. + */ + if (PREDICT_FALSE(xd->admin_up != 1)) + { + for (mb_index = 0; mb_index < n_buffers; mb_index++) + rte_pktmbuf_free (xd->rx_vectors[queue_id][mb_index]); + + return 0; + } + + /* Check for congestion if EFD (Early-Fast-Discard) is enabled + * in any mode (e.g. dpdk, monitor, or drop_all) + */ + if (PREDICT_FALSE(dm->efd.enabled)) + { + /* update EFD counters */ + dpdk_efd_update_counters(xd, n_buffers, dm->efd.enabled); + + if (PREDICT_FALSE(dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED)) + { + /* discard all received packets */ + for (mb_index = 0; mb_index < n_buffers; mb_index++) + rte_pktmbuf_free(xd->rx_vectors[queue_id][mb_index]); + + xd->efd_agent.discard_cnt += n_buffers; + increment_efd_drop_counter(vm, + DPDK_ERROR_VLAN_EFD_DROP_PKTS, + n_buffers); + + return 0; + } + + if (PREDICT_FALSE(xd->efd_agent.consec_full_frames_cnt >= + dm->efd.consec_full_frames_hi_thresh)) + { + u32 device_queue_sz = rte_eth_rx_queue_count(xd->device_index, + queue_id); + if (device_queue_sz >= dm->efd.queue_hi_thresh) + { + /* dpdk device queue has reached the critical threshold */ + xd->efd_agent.congestion_cnt++; + + /* apply EFD to packets from the burst */ + efd_discard_burst = 1; + } + } + } + + mb_index = 0; + + while (n_buffers > 0) + { + u32 bi0; + u8 next0, error0; + u32 l3_offset0; + vlib_buffer_t * b0, * b_seg, * b_chain = 0; + u32 cntr_type; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_buffers > 0 && n_left_to_next > 0) + { + u8 nb_seg = 1; + struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index]; + struct rte_mbuf *mb_seg = mb->next; + + if (PREDICT_TRUE(n_buffers > 2)) + { + struct rte_mbuf *pfmb = xd->rx_vectors[queue_id][mb_index+2]; + vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1); + CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE); + } + + ASSERT(mb); + + b0 = (vlib_buffer_t *)(mb+1); + + /* check whether EFD is looking for packets to discard */ + if (PREDICT_FALSE(efd_discard_burst)) + { + vlib_thread_main_t * tm = vlib_get_thread_main(); + + if (PREDICT_TRUE(cntr_type = is_efd_discardable(tm, b0, mb))) + { + rte_pktmbuf_free(mb); + xd->efd_agent.discard_cnt++; + increment_efd_drop_counter(vm, + cntr_type, + 1); + n_buffers--; + mb_index++; + continue; + } + } + + /* Prefetch one next segment if it exists. */ + if (PREDICT_FALSE(mb->nb_segs > 1)) + { + struct rte_mbuf *pfmb = mb->next; + vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1); + CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE); + b_chain = b0; + } + + vlib_buffer_init_for_free_list (b0, fl); + b0->clone_count = 0; + + bi0 = vlib_get_buffer_index (vm, b0); + + to_next[0] = bi0; + to_next++; + n_left_to_next--; + + dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb, b0, + &next0, &error0); +#ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS + /* + * Clear overloaded TX offload flags when a DPDK driver + * is using them for RX flags (e.g. Cisco VIC Ethernet driver) + */ + + if (PREDICT_TRUE(trace_cnt == 0)) + mb->ol_flags &= PKT_EXT_RX_CLR_TX_FLAGS_MASK; + else + trace_cnt--; +#endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */ + + b0->error = node->errors[error0]; + + l3_offset0 = ((next0 == DPDK_RX_NEXT_IP4_INPUT || + next0 == DPDK_RX_NEXT_IP6_INPUT || + next0 == DPDK_RX_NEXT_MPLS_INPUT) ? + sizeof (ethernet_header_t) : 0); + + b0->current_data = l3_offset0; + b0->current_length = mb->data_len - l3_offset0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + + if (VMWARE_LENGTH_BUG_WORKAROUND) + b0->current_length -= 4; + + vnet_buffer(b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = (u32)~0; + n_rx_bytes += mb->pkt_len; + + /* Process subsequent segments of multi-segment packets */ + while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) + { + ASSERT(mb_seg != 0); + + b_seg = (vlib_buffer_t *)(mb_seg+1); + vlib_buffer_init_for_free_list (b_seg, fl); + b_seg->clone_count = 0; + + ASSERT((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + ASSERT(b_seg->current_data == 0); + + /* + * The driver (e.g. virtio) may not put the packet data at the start + * of the segment, so don't assume b_seg->current_data == 0 is correct. + */ + b_seg->current_data = (mb_seg->buf_addr + mb_seg->data_off) - (void *)b_seg->data; + + b_seg->current_length = mb_seg->data_len; + b0->total_length_not_including_first_buffer += + mb_seg->data_len; + + b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT; + b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg); + + b_chain = b_seg; + mb_seg = mb_seg->next; + nb_seg++; + } + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b0); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + if (PREDICT_FALSE (n_trace > mb_index)) + vec_add1 (xd->d_trace_buffers, bi0); + n_buffers--; + mb_index++; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + if (PREDICT_FALSE (vec_len (xd->d_trace_buffers) > 0)) + { + dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers, + vec_len (xd->d_trace_buffers)); + vlib_set_trace_count (vm, node, n_trace - vec_len (xd->d_trace_buffers)); + } + + vlib_increment_combined_counter + (vnet_get_main()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + xd->vlib_sw_if_index, + mb_index, n_rx_bytes); + + dpdk_worker_t * dw = vec_elt_at_index(dm->workers, cpu_index); + dw->aggregate_rx_packets += mb_index; + + return mb_index; +} + +#if VIRL > 0 +#define VIRL_SPEED_LIMIT() \ + /* Limit the input rate to 1000 vectors / sec */ \ + { \ + struct timespec ts, tsrem; \ + \ + ts.tv_sec = 0; \ + ts.tv_nsec = 1000*1000; /* 1ms */ \ + \ + while (nanosleep(&ts, &tsrem) < 0) \ + { \ + ts = tsrem; \ + } \ + } +#else +#define VIRL_SPEED_LIMIT() +#endif + + +static uword +dpdk_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + uword n_rx_packets = 0; + dpdk_device_and_queue_t * dq; + u32 cpu_index = os_get_cpu_number(); + + /* + * Poll all devices on this cpu for input/interrupts. + */ + vec_foreach (dq, dm->devices_by_cpu[cpu_index]) + { + xd = vec_elt_at_index(dm->devices, dq->device); + ASSERT(dq->queue_id == 0); + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0); + } + + VIRL_SPEED_LIMIT() + + return n_rx_packets; +} + +uword +dpdk_input_rss (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + uword n_rx_packets = 0; + dpdk_device_and_queue_t * dq; + u32 cpu_index = os_get_cpu_number(); + + /* + * Poll all devices on this cpu for input/interrupts. + */ + vec_foreach (dq, dm->devices_by_cpu[cpu_index]) + { + xd = vec_elt_at_index(dm->devices, dq->device); + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id); + } + + VIRL_SPEED_LIMIT() + + return n_rx_packets; +} + +VLIB_REGISTER_NODE (dpdk_input_node) = { + .function = dpdk_input, + .type = VLIB_NODE_TYPE_INPUT, + .name = "dpdk-input", + + /* Will be enabled if/when hardware is detected. */ + .state = VLIB_NODE_STATE_DISABLED, + + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_dpdk_rx_dma_trace, + + .n_errors = DPDK_N_ERROR, + .error_strings = dpdk_error_strings, + + .n_next_nodes = DPDK_RX_N_NEXT, + .next_nodes = { + [DPDK_RX_NEXT_DROP] = "error-drop", + [DPDK_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", + [DPDK_RX_NEXT_IP4_INPUT] = "ip4-input-no-checksum", + [DPDK_RX_NEXT_IP6_INPUT] = "ip6-input", + [DPDK_RX_NEXT_MPLS_INPUT] = "mpls-gre-input", + }, +}; + +/* + * Override the next nodes for the dpdk input nodes. + * Must be invoked prior to VLIB_INIT_FUNCTION calls. + */ +void dpdk_set_next_node (dpdk_rx_next_t next, char *name) +{ + vlib_node_registration_t *r = &dpdk_input_node; + vlib_node_registration_t *r_io = &dpdk_io_input_node; + vlib_node_registration_t *r_handoff = &handoff_dispatch_node; + + switch (next) + { + case DPDK_RX_NEXT_IP4_INPUT: + case DPDK_RX_NEXT_IP6_INPUT: + case DPDK_RX_NEXT_MPLS_INPUT: + case DPDK_RX_NEXT_ETHERNET_INPUT: + r->next_nodes[next] = name; + r_io->next_nodes[next] = name; + r_handoff->next_nodes[next] = name; + break; + + default: + clib_warning ("%s: illegal next %d\n", __FUNCTION__, next); + break; + } +} + +inline vlib_frame_queue_elt_t * +vlib_get_handoff_queue_elt (u32 vlib_worker_index) +{ + vlib_frame_queue_t *fq; + vlib_frame_queue_elt_t *elt; + u64 new_tail; + + fq = vlib_frame_queues[vlib_worker_index]; + ASSERT (fq); + + new_tail = __sync_add_and_fetch (&fq->tail, 1); + + /* Wait until a ring slot is available */ + while (new_tail >= fq->head_hint + fq->nelts) + vlib_worker_thread_barrier_check (); + + elt = fq->elts + (new_tail & (fq->nelts-1)); + + /* this would be very bad... */ + while (elt->valid) + ; + + elt->msg_type = VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME; + elt->last_n_vectors = elt->n_vectors = 0; + + return elt; +} + +inline vlib_frame_queue_elt_t * +dpdk_get_handoff_queue_elt ( + u32 vlib_worker_index, + vlib_frame_queue_elt_t ** handoff_queue_elt_by_worker_index) +{ + vlib_frame_queue_elt_t *elt; + + if (handoff_queue_elt_by_worker_index [vlib_worker_index]) + return handoff_queue_elt_by_worker_index [vlib_worker_index]; + + elt = vlib_get_handoff_queue_elt (vlib_worker_index); + + handoff_queue_elt_by_worker_index [vlib_worker_index] = elt; + + return elt; +} + +static inline vlib_frame_queue_t * +is_vlib_handoff_queue_congested ( + u32 vlib_worker_index, + u32 queue_hi_thresh, + vlib_frame_queue_t ** handoff_queue_by_worker_index) +{ + vlib_frame_queue_t *fq; + + fq = handoff_queue_by_worker_index [vlib_worker_index]; + if (fq != (vlib_frame_queue_t *)(~0)) + return fq; + + fq = vlib_frame_queues[vlib_worker_index]; + ASSERT (fq); + + if (PREDICT_FALSE(fq->tail >= (fq->head_hint + queue_hi_thresh))) { + /* a valid entry in the array will indicate the queue has reached + * the specified threshold and is congested + */ + handoff_queue_by_worker_index [vlib_worker_index] = fq; + fq->enqueue_full_events++; + return fq; + } + + return NULL; +} + +static inline u64 ipv4_get_key (ip4_header_t *ip) +{ + u64 hash_key; + + hash_key = *((u64*)(&ip->address_pair)) ^ ip->protocol; + + return hash_key; +} + +static inline u64 ipv6_get_key (ip6_header_t *ip) +{ + u64 hash_key; + + hash_key = ip->src_address.as_u64[0] ^ + ip->src_address.as_u64[1] ^ + ip->dst_address.as_u64[0] ^ + ip->dst_address.as_u64[1] ^ + ip->protocol; + + return hash_key; +} + + +#define MPLS_BOTTOM_OF_STACK_BIT_MASK 0x00000100U +#define MPLS_LABEL_MASK 0xFFFFF000U + +static inline u64 mpls_get_key (mpls_unicast_header_t *m) +{ + u64 hash_key; + u8 ip_ver; + + + /* find the bottom of the MPLS label stack. */ + if (PREDICT_TRUE(m->label_exp_s_ttl & + clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK))) { + goto bottom_lbl_found; + } + m++; + + if (PREDICT_TRUE(m->label_exp_s_ttl & + clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK))) { + goto bottom_lbl_found; + } + m++; + + if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) { + goto bottom_lbl_found; + } + m++; + + if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) { + goto bottom_lbl_found; + } + m++; + + if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) { + goto bottom_lbl_found; + } + + /* the bottom label was not found - use the last label */ + hash_key = m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_LABEL_MASK); + + return hash_key; + + +bottom_lbl_found: + m++; + ip_ver = (*((u8 *)m) >> 4); + + /* find out if it is IPV4 or IPV6 header */ + if (PREDICT_TRUE(ip_ver == 4)) { + hash_key = ipv4_get_key((ip4_header_t *)m); + } else if (PREDICT_TRUE(ip_ver == 6)) { + hash_key = ipv6_get_key((ip6_header_t *)m); + } else { + /* use the bottom label */ + hash_key = (m-1)->label_exp_s_ttl & clib_net_to_host_u32(MPLS_LABEL_MASK); + } + + return hash_key; + +} + +static inline u64 eth_get_key (ethernet_header_t *h0) +{ + u64 hash_key; + + + if (PREDICT_TRUE(h0->type) == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) { + hash_key = ipv4_get_key((ip4_header_t *)(h0+1)); + } else if (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_IP6)) { + hash_key = ipv6_get_key((ip6_header_t *)(h0+1)); + } else if (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)) { + hash_key = mpls_get_key((mpls_unicast_header_t *)(h0+1)); + } else if ((h0->type == clib_host_to_net_u16(ETHERNET_TYPE_VLAN)) || + (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_DOT1AD))) { + ethernet_vlan_header_t * outer = (ethernet_vlan_header_t *)(h0 + 1); + + outer = (outer->type == clib_host_to_net_u16(ETHERNET_TYPE_VLAN)) ? + outer+1 : outer; + if (PREDICT_TRUE(outer->type) == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) { + hash_key = ipv4_get_key((ip4_header_t *)(outer+1)); + } else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)) { + hash_key = ipv6_get_key((ip6_header_t *)(outer+1)); + } else if (outer->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)) { + hash_key = mpls_get_key((mpls_unicast_header_t *)(outer+1)); + } else { + hash_key = outer->type; + } + } else { + hash_key = 0; + } + + return hash_key; +} + +/* + * This function is used when dedicated IO threads feed the worker threads. + * + * Devices are allocated to this thread based on instances and instance_id. + * If instances==0 then the function automatically determines the number + * of instances of this thread, and allocates devices between them. + * If instances != 0, then instance_id must be in the range 0..instances-1. + * The function allocates devices among the specified number of instances, + * with this thread having the given instance id. This option is used for + * splitting devices among differently named "io"-type threads. + */ +void dpdk_io_thread (vlib_worker_thread_t * w, + u32 instances, + u32 instance_id, + char *worker_name, + dpdk_io_thread_callback_t callback) +{ + vlib_main_t * vm = vlib_get_main(); + vlib_thread_main_t * tm = vlib_get_thread_main(); + vlib_thread_registration_t * tr; + dpdk_main_t * dm = &dpdk_main; + char *io_name = w->registration->name; + dpdk_device_t * xd; + dpdk_device_t ** my_devices = 0; + vlib_frame_queue_elt_t ** handoff_queue_elt_by_worker_index = 0; + vlib_frame_queue_t ** congested_handoff_queue_by_worker_index = 0; + vlib_frame_queue_elt_t * hf = 0; + int i; + u32 n_left_to_next_worker = 0, * to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + u32 cpu_index = os_get_cpu_number(); + u32 num_workers = 0; + u32 num_devices = 0; + uword * p; + u16 queue_id = 0; + vlib_node_runtime_t * node_trace; + u32 first_worker_index = 0; + + /* Wait until the dpdk init sequence is complete */ + while (dm->io_thread_release == 0) + vlib_worker_thread_barrier_check(); + + clib_time_init (&vm->clib_time); + + p = hash_get_mem (tm->thread_registrations_by_name, worker_name); + ASSERT (p); + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + num_workers = tr->count; + first_worker_index = tr->first_index; + } + + /* Allocate devices to this thread */ + if (instances == 0) + { + /* auto-assign */ + instance_id = w->instance_id; + + p = hash_get_mem (tm->thread_registrations_by_name, io_name); + tr = (vlib_thread_registration_t *) p[0]; + /* Otherwise, how did we get here */ + ASSERT (tr && tr->count); + instances = tr->count; + } + else + { + /* manually assign */ + ASSERT (instance_id < instances); + } + + vec_validate (handoff_queue_elt_by_worker_index, + first_worker_index + num_workers - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + first_worker_index + num_workers - 1, + (vlib_frame_queue_t *)(~0)); + + /* packet tracing is triggered on the dpdk-input node for ease-of-use */ + node_trace = vlib_node_get_runtime (vm, dpdk_input_node.index); + + /* And handle them... */ + while (1) + { + u32 n_buffers; + u32 mb_index; + uword n_rx_bytes = 0; + u32 n_trace, trace_cnt __attribute__((unused)); + vlib_buffer_free_list_t * fl; + u32 hash; + u64 hash_key; + u8 efd_discard_burst; + + vlib_worker_thread_barrier_check (); + + /* Invoke callback if supplied */ + if (PREDICT_FALSE(callback != NULL)) + callback(vm); + + if (PREDICT_FALSE(vec_len(dm->devices) != num_devices)) + { + vec_reset_length(my_devices); + vec_foreach (xd, dm->devices) + { + if (((xd - dm->devices) % tr->count) == instance_id) + { + fprintf(stderr, "i/o thread %d (cpu %d) takes port %d\n", + instance_id, (int) os_get_cpu_number(), (int) (xd - dm->devices)); + vec_add1 (my_devices, xd); + } + } + num_devices = vec_len(dm->devices); + } + + for (i = 0; i < vec_len (my_devices); i++) + { + xd = my_devices[i]; + + if (!xd->admin_up) + continue; + + n_buffers = dpdk_rx_burst(dm, xd, 0 /* queue_id */); + + if (n_buffers == 0) + { + /* check if EFD (dpdk) is enabled */ + if (PREDICT_FALSE(dm->efd.enabled)) + { + /* reset a few stats */ + xd->efd_agent.last_poll_time = 0; + xd->efd_agent.last_burst_sz = 0; + } + continue; + } + + vec_reset_length (xd->d_trace_buffers); + trace_cnt = n_trace = vlib_get_trace_count (vm, node_trace); + + /* + * DAW-FIXME: VMXNET3 device stop/start doesn't work, + * therefore fake the stop in the dpdk driver by + * silently dropping all of the incoming pkts instead of + * stopping the driver / hardware. + */ + if (PREDICT_FALSE(xd->admin_up != 1)) + { + for (mb_index = 0; mb_index < n_buffers; mb_index++) + rte_pktmbuf_free (xd->rx_vectors[queue_id][mb_index]); + continue; + } + + /* reset EFD action for the burst */ + efd_discard_burst = 0; + + /* Check for congestion if EFD (Early-Fast-Discard) is enabled + * in any mode (e.g. dpdk, monitor, or drop_all) + */ + if (PREDICT_FALSE(dm->efd.enabled)) + { + /* update EFD counters */ + dpdk_efd_update_counters(xd, n_buffers, dm->efd.enabled); + + if (PREDICT_FALSE(dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED)) + { + /* drop all received packets */ + for (mb_index = 0; mb_index < n_buffers; mb_index++) + rte_pktmbuf_free(xd->rx_vectors[queue_id][mb_index]); + + xd->efd_agent.discard_cnt += n_buffers; + increment_efd_drop_counter(vm, + DPDK_ERROR_VLAN_EFD_DROP_PKTS, + n_buffers); + + continue; + } + + if (PREDICT_FALSE(xd->efd_agent.consec_full_frames_cnt >= + dm->efd.consec_full_frames_hi_thresh)) + { + u32 device_queue_sz = rte_eth_rx_queue_count(xd->device_index, + queue_id); + if (device_queue_sz >= dm->efd.queue_hi_thresh) + { + /* dpdk device queue has reached the critical threshold */ + xd->efd_agent.congestion_cnt++; + + /* apply EFD to packets from the burst */ + efd_discard_burst = 1; + } + } + } + + fl = vlib_buffer_get_free_list + (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + mb_index = 0; + + while (n_buffers > 0) + { + u32 bi0; + u8 next0, error0; + u32 l3_offset0; + vlib_buffer_t * b0, * b_seg, * b_chain = 0; + ethernet_header_t * h0; + u8 nb_seg = 1; + struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index]; + struct rte_mbuf *mb_seg = mb->next; + + if (PREDICT_TRUE(n_buffers > 1)) + { + struct rte_mbuf *pfmb = xd->rx_vectors[queue_id][mb_index+2]; + vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1); + CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (bp->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + b0 = (vlib_buffer_t *)(mb+1); + + /* check whether EFD is looking for packets to discard */ + if (PREDICT_FALSE(efd_discard_burst)) + { + u32 cntr_type; + if (PREDICT_TRUE(cntr_type = is_efd_discardable(tm, b0, mb))) + { + rte_pktmbuf_free(mb); + xd->efd_agent.discard_cnt++; + increment_efd_drop_counter(vm, + cntr_type, + 1); + + n_buffers--; + mb_index++; + continue; + } + } + + /* Prefetch one next segment if it exists */ + if (PREDICT_FALSE(mb->nb_segs > 1)) + { + struct rte_mbuf *pfmb = mb->next; + vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1); + CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE); + b_chain = b0; + } + + bi0 = vlib_get_buffer_index (vm, b0); + vlib_buffer_init_for_free_list (b0, fl); + b0->clone_count = 0; + + dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb, b0, + &next0, &error0); +#ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS + /* + * Clear overloaded TX offload flags when a DPDK driver + * is using them for RX flags (e.g. Cisco VIC Ethernet driver) + */ + if (PREDICT_TRUE(trace_cnt == 0)) + mb->ol_flags &= PKT_EXT_RX_CLR_TX_FLAGS_MASK; + else + trace_cnt--; +#endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */ + + if (error0) + clib_warning ("bi %d error %d", bi0, error0); + + b0->error = 0; + + l3_offset0 = ((next0 == DPDK_RX_NEXT_IP4_INPUT || + next0 == DPDK_RX_NEXT_IP6_INPUT || + next0 == DPDK_RX_NEXT_MPLS_INPUT) ? + sizeof (ethernet_header_t) : 0); + + b0->current_data = l3_offset0; + b0->current_length = mb->data_len - l3_offset0; + + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + + if (VMWARE_LENGTH_BUG_WORKAROUND) + b0->current_length -= 4; + + vnet_buffer(b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = (u32)~0; + vnet_buffer(b0)->io_handoff.next_index = next0; + n_rx_bytes += mb->pkt_len; + + /* Process subsequent segments of multi-segment packets */ + while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) + { + ASSERT(mb_seg != 0); + + b_seg = (vlib_buffer_t *)(mb_seg+1); + vlib_buffer_init_for_free_list (b_seg, fl); + b_seg->clone_count = 0; + + ASSERT((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + ASSERT(b_seg->current_data == 0); + + /* + * The driver (e.g. virtio) may not put the packet data at the start + * of the segment, so don't assume b_seg->current_data == 0 is correct. + */ + b_seg->current_data = (mb_seg->buf_addr + mb_seg->data_off) - (void *)b_seg->data; + + b_seg->current_length = mb_seg->data_len; + b0->total_length_not_including_first_buffer += + mb_seg->data_len; + + b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT; + b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg); + + b_chain = b_seg; + mb_seg = mb_seg->next; + nb_seg++; + } + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b0); + + if (PREDICT_FALSE (n_trace > mb_index)) + vec_add1 (xd->d_trace_buffers, bi0); + + next_worker_index = first_worker_index; + + /* + * Force unknown traffic onto worker 0, + * and into ethernet-input. $$$$ add more hashes. + */ + h0 = (ethernet_header_t *) b0->data; + + /* Compute ingress LB hash */ + hash_key = eth_get_key(h0); + hash = (u32)clib_xxhash(hash_key); + + if (PREDICT_TRUE (is_pow2(num_workers))) + next_worker_index += hash & (num_workers - 1); + else + next_worker_index += hash % num_workers; + + /* if EFD is enabled and not already discarding from dpdk, + * check the worker ring/queue for congestion + */ + if (PREDICT_FALSE(tm->efd.enabled && !efd_discard_burst)) + { + vlib_frame_queue_t *fq; + + /* fq will be valid if the ring is congested */ + fq = is_vlib_handoff_queue_congested( + next_worker_index, tm->efd.queue_hi_thresh, + congested_handoff_queue_by_worker_index); + + if (PREDICT_FALSE(fq != NULL)) + { + u32 cntr_type; + if (PREDICT_TRUE(cntr_type = + is_efd_discardable(tm, b0, mb))) + { + /* discard the packet */ + fq->enqueue_efd_discards++; + increment_efd_drop_counter(vm, cntr_type, 1); + rte_pktmbuf_free(mb); + n_buffers--; + mb_index++; + continue; + } + } + } + + if (next_worker_index != current_worker_index) + { + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = dpdk_get_handoff_queue_elt( + next_worker_index, + handoff_queue_elt_by_worker_index); + + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_handoff_queue_elt(hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + + n_buffers--; + mb_index++; + } + + if (PREDICT_FALSE (vec_len (xd->d_trace_buffers) > 0)) + { + /* credit the trace to the trace node */ + dpdk_rx_trace (dm, node_trace, xd, queue_id, xd->d_trace_buffers, + vec_len (xd->d_trace_buffers)); + vlib_set_trace_count (vm, node_trace, n_trace - vec_len (xd->d_trace_buffers)); + } + + vlib_increment_combined_counter + (vnet_get_main()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + xd->vlib_sw_if_index, + mb_index, n_rx_bytes); + + dpdk_worker_t * dw = vec_elt_at_index(dm->workers, cpu_index); + dw->aggregate_rx_packets += mb_index; + } + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_handoff_queue_elt(hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = (vlib_frame_queue_t *)(~0); + } + hf = 0; + current_worker_index = ~0; + + vlib_increment_main_loop_counter (vm); + } +} + +/* + * This function is used when the main thread performs IO and feeds the + * worker threads. + */ +static uword +dpdk_io_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + vlib_thread_main_t * tm = vlib_get_thread_main(); + uword n_rx_packets = 0; + static vlib_frame_queue_elt_t ** handoff_queue_elt_by_worker_index; + static vlib_frame_queue_t ** congested_handoff_queue_by_worker_index = 0; + vlib_frame_queue_elt_t * hf = 0; + int i; + u32 n_left_to_next_worker = 0, * to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + u32 cpu_index = os_get_cpu_number(); + static int num_workers_set; + static u32 num_workers; + u16 queue_id = 0; + vlib_node_runtime_t * node_trace; + static u32 first_worker_index; + + if (PREDICT_FALSE(num_workers_set == 0)) + { + uword * p; + vlib_thread_registration_t * tr; + /* Only the standard vnet worker threads are supported */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + num_workers = tr->count; + first_worker_index = tr->first_index; + } + num_workers_set = 1; + } + + if (PREDICT_FALSE(handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + first_worker_index + num_workers - 1, + (vlib_frame_queue_t *)(~0)); + } + + /* packet tracing is triggered on the dpdk-input node for ease-of-use */ + node_trace = vlib_node_get_runtime (vm, dpdk_input_node.index); + + vec_foreach (xd, dm->devices) + { + u32 n_buffers; + u32 mb_index; + uword n_rx_bytes = 0; + u32 n_trace, trace_cnt __attribute__((unused)); + vlib_buffer_free_list_t * fl; + u32 hash; + u64 hash_key; + u8 efd_discard_burst = 0; + + if (!xd->admin_up) + continue; + + n_buffers = dpdk_rx_burst(dm, xd, queue_id ); + + if (n_buffers == 0) + { + /* check if EFD (dpdk) is enabled */ + if (PREDICT_FALSE(dm->efd.enabled)) + { + /* reset a few stats */ + xd->efd_agent.last_poll_time = 0; + xd->efd_agent.last_burst_sz = 0; + } + continue; + } + + vec_reset_length (xd->d_trace_buffers); + trace_cnt = n_trace = vlib_get_trace_count (vm, node_trace); + + /* + * DAW-FIXME: VMXNET3 device stop/start doesn't work, + * therefore fake the stop in the dpdk driver by + * silently dropping all of the incoming pkts instead of + * stopping the driver / hardware. + */ + if (PREDICT_FALSE(xd->admin_up != 1)) + { + for (mb_index = 0; mb_index < n_buffers; mb_index++) + rte_pktmbuf_free (xd->rx_vectors[queue_id][mb_index]); + continue; + } + + /* Check for congestion if EFD (Early-Fast-Discard) is enabled + * in any mode (e.g. dpdk, monitor, or drop_all) + */ + if (PREDICT_FALSE(dm->efd.enabled)) + { + /* update EFD counters */ + dpdk_efd_update_counters(xd, n_buffers, dm->efd.enabled); + + if (PREDICT_FALSE(dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED)) + { + /* discard all received packets */ + for (mb_index = 0; mb_index < n_buffers; mb_index++) + rte_pktmbuf_free(xd->rx_vectors[queue_id][mb_index]); + + xd->efd_agent.discard_cnt += n_buffers; + increment_efd_drop_counter(vm, + DPDK_ERROR_VLAN_EFD_DROP_PKTS, + n_buffers); + + continue; + } + + if (PREDICT_FALSE(xd->efd_agent.consec_full_frames_cnt >= + dm->efd.consec_full_frames_hi_thresh)) + { + u32 device_queue_sz = rte_eth_rx_queue_count(xd->device_index, + queue_id); + if (device_queue_sz >= dm->efd.queue_hi_thresh) + { + /* dpdk device queue has reached the critical threshold */ + xd->efd_agent.congestion_cnt++; + + /* apply EFD to packets from the burst */ + efd_discard_burst = 1; + } + } + } + + fl = vlib_buffer_get_free_list + (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + mb_index = 0; + + while (n_buffers > 0) + { + u32 bi0; + u8 next0, error0; + u32 l3_offset0; + vlib_buffer_t * b0, * b_seg, * b_chain = 0; + ethernet_header_t * h0; + u8 nb_seg = 1; + struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index]; + struct rte_mbuf *mb_seg = mb->next; + + if (PREDICT_TRUE(n_buffers > 1)) + { + struct rte_mbuf *pfmb = xd->rx_vectors[queue_id][mb_index+2]; + vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1); + CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (bp->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + b0 = (vlib_buffer_t *)(mb+1); + + /* check whether EFD is looking for packets to discard */ + if (PREDICT_FALSE(efd_discard_burst)) + { + u32 cntr_type; + if (PREDICT_TRUE(cntr_type = is_efd_discardable(tm, b0, mb))) + { + rte_pktmbuf_free(mb); + xd->efd_agent.discard_cnt++; + increment_efd_drop_counter(vm, + cntr_type, + 1); + + n_buffers--; + mb_index++; + continue; + } + } + + /* Prefetch one next segment if it exists */ + if (PREDICT_FALSE(mb->nb_segs > 1)) + { + struct rte_mbuf *pfmb = mb->next; + vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1); + CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE); + b_chain = b0; + } + + bi0 = vlib_get_buffer_index (vm, b0); + vlib_buffer_init_for_free_list (b0, fl); + b0->clone_count = 0; + + dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb, b0, + &next0, &error0); +#ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS + /* + * Clear overloaded TX offload flags when a DPDK driver + * is using them for RX flags (e.g. Cisco VIC Ethernet driver) + */ + if (PREDICT_TRUE(trace_cnt == 0)) + mb->ol_flags &= PKT_EXT_RX_CLR_TX_FLAGS_MASK; + else + trace_cnt--; +#endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */ + + if (error0) + clib_warning ("bi %d error %d", bi0, error0); + + b0->error = 0; + + l3_offset0 = ((next0 == DPDK_RX_NEXT_IP4_INPUT || + next0 == DPDK_RX_NEXT_IP6_INPUT || + next0 == DPDK_RX_NEXT_MPLS_INPUT) ? + sizeof (ethernet_header_t) : 0); + + b0->current_data = l3_offset0; + b0->current_length = mb->data_len - l3_offset0; + + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + + if (VMWARE_LENGTH_BUG_WORKAROUND) + b0->current_length -= 4; + + vnet_buffer(b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = (u32)~0; + vnet_buffer(b0)->io_handoff.next_index = next0; + n_rx_bytes += mb->pkt_len; + + /* Process subsequent segments of multi-segment packets */ + while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) + { + ASSERT(mb_seg != 0); + + b_seg = (vlib_buffer_t *)(mb_seg+1); + vlib_buffer_init_for_free_list (b_seg, fl); + b_seg->clone_count = 0; + + ASSERT((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + ASSERT(b_seg->current_data == 0); + + /* + * The driver (e.g. virtio) may not put the packet data at the start + * of the segment, so don't assume b_seg->current_data == 0 is correct. + */ + b_seg->current_data = (mb_seg->buf_addr + mb_seg->data_off) - (void *)b_seg->data; + + b_seg->current_length = mb_seg->data_len; + b0->total_length_not_including_first_buffer += + mb_seg->data_len; + + b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT; + b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg); + + b_chain = b_seg; + mb_seg = mb_seg->next; + nb_seg++; + } + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b0); + + if (PREDICT_FALSE (n_trace > mb_index)) + vec_add1 (xd->d_trace_buffers, bi0); + + next_worker_index = first_worker_index; + + /* + * Force unknown traffic onto worker 0, + * and into ethernet-input. $$$$ add more hashes. + */ + h0 = (ethernet_header_t *) b0->data; + + /* Compute ingress LB hash */ + hash_key = eth_get_key(h0); + hash = (u32)clib_xxhash(hash_key); + + if (PREDICT_TRUE (is_pow2(num_workers))) + next_worker_index += hash & (num_workers - 1); + else + next_worker_index += hash % num_workers; + + /* if EFD is enabled and not already discarding from dpdk, + * check the worker ring/queue for congestion + */ + if (PREDICT_FALSE(tm->efd.enabled && !efd_discard_burst)) + { + vlib_frame_queue_t *fq; + + /* fq will be valid if the ring is congested */ + fq = is_vlib_handoff_queue_congested( + next_worker_index, tm->efd.queue_hi_thresh, + congested_handoff_queue_by_worker_index); + + if (PREDICT_FALSE(fq != NULL)) + { + u32 cntr_type; + if (PREDICT_TRUE(cntr_type = + is_efd_discardable(tm, b0, mb))) + { + /* discard the packet */ + fq->enqueue_efd_discards++; + increment_efd_drop_counter(vm, cntr_type, 1); + rte_pktmbuf_free(mb); + n_buffers--; + mb_index++; + continue; + } + } + } + + if (next_worker_index != current_worker_index) + { + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = dpdk_get_handoff_queue_elt( + next_worker_index, + handoff_queue_elt_by_worker_index); + + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_handoff_queue_elt(hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + + n_buffers--; + mb_index++; + } + + if (PREDICT_FALSE (vec_len (xd->d_trace_buffers) > 0)) + { + /* credit the trace to the trace node */ + dpdk_rx_trace (dm, node_trace, xd, queue_id, xd->d_trace_buffers, + vec_len (xd->d_trace_buffers)); + vlib_set_trace_count (vm, node_trace, n_trace - vec_len (xd->d_trace_buffers)); + } + + vlib_increment_combined_counter + (vnet_get_main()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + xd->vlib_sw_if_index, + mb_index, n_rx_bytes); + + dpdk_worker_t * dw = vec_elt_at_index(dm->workers, cpu_index); + dw->aggregate_rx_packets += mb_index; + n_rx_packets += mb_index; + } + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_handoff_queue_elt(hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = (vlib_frame_queue_t *)(~0); + } + hf = 0; + current_worker_index = ~0; + return n_rx_packets; +} + +VLIB_REGISTER_NODE (dpdk_io_input_node) = { + .function = dpdk_io_input, + .type = VLIB_NODE_TYPE_INPUT, + .name = "dpdk-io-input", + + /* Will be enabled if/when hardware is detected. */ + .state = VLIB_NODE_STATE_DISABLED, + + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_dpdk_rx_dma_trace, + + .n_errors = DPDK_N_ERROR, + .error_strings = dpdk_error_strings, + + .n_next_nodes = DPDK_RX_N_NEXT, + .next_nodes = { + [DPDK_RX_NEXT_DROP] = "error-drop", + [DPDK_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", + [DPDK_RX_NEXT_IP4_INPUT] = "ip4-input-no-checksum", + [DPDK_RX_NEXT_IP6_INPUT] = "ip6-input", + [DPDK_RX_NEXT_MPLS_INPUT] = "mpls-gre-input", + }, +}; + +/* + * set_efd_bitmap() + * Based on the operation type, set lower/upper bits for the given index value + */ +void +set_efd_bitmap (u8 *bitmap, u32 value, u32 op) +{ + int ix; + + *bitmap = 0; + for (ix = 0; ix < 8; ix++) { + if (((op == EFD_OPERATION_LESS_THAN) && (ix < value)) || + ((op == EFD_OPERATION_GREATER_OR_EQUAL) && (ix >= value))){ + (*bitmap) |= (1 << ix); + } + } +} + +void +efd_config (u32 enabled, + u32 ip_prec, u32 ip_op, + u32 mpls_exp, u32 mpls_op, + u32 vlan_cos, u32 vlan_op) +{ + vlib_thread_main_t * tm = vlib_get_thread_main(); + dpdk_main_t * dm = &dpdk_main; + + if (enabled) { + tm->efd.enabled |= VLIB_EFD_DISCARD_ENABLED; + dm->efd.enabled |= DPDK_EFD_DISCARD_ENABLED; + } else { + tm->efd.enabled &= ~VLIB_EFD_DISCARD_ENABLED; + dm->efd.enabled &= ~DPDK_EFD_DISCARD_ENABLED; + } + + set_efd_bitmap(&tm->efd.ip_prec_bitmap, ip_prec, ip_op); + set_efd_bitmap(&tm->efd.mpls_exp_bitmap, mpls_exp, mpls_op); + set_efd_bitmap(&tm->efd.vlan_cos_bitmap, vlan_cos, vlan_op); + +} diff --git a/vnet/vnet/devices/dpdk/threads.c b/vnet/vnet/devices/dpdk/threads.c new file mode 100644 index 00000000000..aa32f1007c3 --- /dev/null +++ b/vnet/vnet/devices/dpdk/threads.c @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/error.h> +#include <vppinfra/format.h> +#include <signal.h> + +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/dpdk/dpdk.h> + +#include <vlibmemory/api.h> +#include <vlibmemory/vl_memory_msg_enum.h> /* enumerate all vlib messages */ + +#define vl_typedefs /* define message structures */ +#include <vlibmemory/vl_memory_api_h.h> +#undef vl_typedefs + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vlibmemory/vl_memory_api_h.h> +#undef vl_printfun + +vlib_thread_main_t vlib_thread_main; + +frame_queue_trace_t *frame_queue_traces; + +/* + * Check the frame queue to see if any frames are available. + * If so, pull the packets off the frames and put them to + * the handoff node. + */ +static inline int vlib_frame_queue_dequeue_internal (vlib_main_t *vm) +{ + u32 thread_id = vm->cpu_index; + vlib_frame_queue_t *fq = vlib_frame_queues[thread_id]; + vlib_frame_queue_elt_t *elt; + u32 * from, * to; + vlib_frame_t * f; + int msg_type; + int processed = 0; + u32 n_left_to_node; + u32 vectors = 0; + + ASSERT (fq); + ASSERT(vm == vlib_mains[thread_id]); + + /* + * Gather trace data for frame queues + */ + if (PREDICT_FALSE(fq->trace)) + { + frame_queue_trace_t *fqt; + u32 elix; + + fqt = &frame_queue_traces[thread_id]; + fqt->nelts = fq->nelts; + fqt->head = fq->head; + fqt->head_hint = fq->head_hint; + fqt->tail = fq->tail; + fqt->threshold = fq->vector_threshold; + fqt->n_in_use = fqt->tail - fqt->head; + if (fqt->n_in_use > fqt->nelts){ + fqt->n_in_use = 0; + } + + for (elix=0; elix<fqt->nelts; elix++) { + elt = fq->elts + ((fq->head+1 + elix) & (fq->nelts-1)); + if (1 || elt->valid) + { + fqt->n_vectors[elix] = elt->n_vectors; + } + } + fqt->written = 1; + } + + while (1) + { + if (fq->head == fq->tail) + { + fq->head_hint = fq->head; + return processed; + } + + elt = fq->elts + ((fq->head+1) & (fq->nelts-1)); + + if (!elt->valid) + { + fq->head_hint = fq->head; + return processed; + } + + from = elt->buffer_index; + msg_type = elt->msg_type; + + ASSERT (msg_type == VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME); + ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE); + + f = vlib_get_frame_to_node + (vm, 1 ? handoff_dispatch_node.index : ethernet_input_node.index); + + to = vlib_frame_vector_args (f); + + n_left_to_node = elt->n_vectors; + + while (n_left_to_node >= 4) + { + to[0] = from[0]; + to[1] = from[1]; + to[2] = from[2]; + to[3] = from[3]; + to += 4; + from += 4; + n_left_to_node -= 4; + } + + while (n_left_to_node > 0) + { + to[0] = from[0]; + to++; + from++; + n_left_to_node--; + } + + vectors += elt->n_vectors; + f->n_vectors = elt->n_vectors; + vlib_put_frame_to_node + (vm, 1 ? handoff_dispatch_node.index : ethernet_input_node.index, f); + + elt->valid = 0; + elt->n_vectors = 0; + elt->msg_type = 0xfefefefe; + CLIB_MEMORY_BARRIER(); + fq->head++; + processed++; + + /* + * Limit the number of packets pushed into the graph + */ + if (vectors >= fq->vector_threshold) + { + fq->head_hint = fq->head; + return processed; + } + } + ASSERT(0); + return processed; +} + +int dpdk_frame_queue_dequeue (vlib_main_t *vm) +{ + return vlib_frame_queue_dequeue_internal (vm); +} + +/* + * dpdk_worker_thread - Contains the main loop of a worker thread. + * + * w + * Information for the current thread + * io_name + * The name of thread performing dpdk device IO (if any). If there are no + * instances of that thread, then the current thread will do dpdk device + * polling. Ports will be divided among instances of the current thread. + * callback + * If not null, this function will be called once during each main loop. + */ +static_always_inline void +dpdk_worker_thread_internal (vlib_main_t *vm, + dpdk_worker_thread_callback_t callback, + int have_io_threads) +{ + vlib_node_main_t * nm = &vm->node_main; + u64 cpu_time_now = clib_cpu_time_now (); + + while (1) + { + vlib_worker_thread_barrier_check (); + + vlib_frame_queue_dequeue_internal (vm); + + /* Invoke callback if supplied */ + if (PREDICT_FALSE(callback != NULL)) + callback(vm); + + if (!have_io_threads) + { + vlib_node_runtime_t * n; + vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]) + { + cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT, + VLIB_NODE_STATE_POLLING, /* frame */ 0, + cpu_time_now); + } + + } + + if (_vec_len (nm->pending_frames)) + { + int i; + cpu_time_now = clib_cpu_time_now (); + for (i = 0; i < _vec_len (nm->pending_frames); i++) { + vlib_pending_frame_t *p; + + p = nm->pending_frames + i; + + cpu_time_now = dispatch_pending_node (vm, p, cpu_time_now); + } + _vec_len (nm->pending_frames) = 0; + } + vlib_increment_main_loop_counter (vm); + + /* Record time stamp in case there are no enabled nodes and above + calls do not update time stamp. */ + cpu_time_now = clib_cpu_time_now (); + } +} + +void dpdk_worker_thread (vlib_worker_thread_t * w, + char *io_name, + dpdk_worker_thread_callback_t callback) +{ + vlib_main_t *vm; + uword * p; + vlib_thread_main_t * tm = vlib_get_thread_main(); + vlib_thread_registration_t * tr; + dpdk_main_t * dm = &dpdk_main; + + vm = vlib_get_main(); + + ASSERT(vm->cpu_index == os_get_cpu_number()); + + clib_time_init (&vm->clib_time); + clib_mem_set_heap (w->thread_mheap); + + /* Wait until the dpdk init sequence is complete */ + while (dm->io_thread_release == 0) + vlib_worker_thread_barrier_check (); + + /* any I/O threads? */ + p = hash_get_mem (tm->thread_registrations_by_name, io_name); + tr = (vlib_thread_registration_t *)p[0]; + + if (tr && tr->count > 0) + dpdk_worker_thread_internal(vm, callback, /* have_io_threads */ 1); + else + dpdk_worker_thread_internal(vm, callback, /* have_io_threads */ 0); +} + +void dpdk_worker_thread_fn (void * arg) +{ + vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; + vlib_worker_thread_init (w); + dpdk_worker_thread (w, "io", 0); +} + +#if VIRL == 0 +VLIB_REGISTER_THREAD (worker_thread_reg, static) = { + .name = "workers", + .short_name = "wk", + .function = dpdk_worker_thread_fn, + .mheap_size = 256<<20, +}; +#endif + +void dpdk_io_thread_fn (void * arg) +{ + vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; + vlib_worker_thread_init (w); + dpdk_io_thread (w, 0, 0, "workers", 0); +} + +#if VIRL == 0 +VLIB_REGISTER_THREAD (io_thread_reg, static) = { + .name = "io", + .short_name = "io", + .function = dpdk_io_thread_fn, + .mheap_size = 256<<20, +}; +#endif + +static void vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) +{ + vl_api_rpc_reply_t * rmp; + int (*fp)(void *); + i32 rv = 0; + vlib_main_t * vm = vlib_get_main(); + + if (mp->function == 0) + { + rv = -1; + clib_warning ("rpc NULL function pointer"); + } + + else + { + if (mp->need_barrier_sync) + vlib_worker_thread_barrier_sync (vm); + + fp = (void *)(mp->function); + rv = (*fp)(mp->data); + + if (mp->need_barrier_sync) + vlib_worker_thread_barrier_release (vm); + } + + if (mp->send_reply) + { + unix_shared_memory_queue_t * q = + vl_api_client_index_to_input_queue (mp->client_index); + if (q) + { + rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_RPC_REPLY); + rmp->context = mp->context; + rmp->retval = rv; + vl_msg_api_send_shmem (q, (u8 *)&rmp); + } + } + if (mp->multicast) + { + clib_warning ("multicast not yet implemented..."); + } +} + +static void vl_api_rpc_reply_t_handler (vl_api_rpc_reply_t * mp) +{ clib_warning ("unimplemented"); } + +void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length) +{ + vl_api_rpc_call_t * mp; + api_main_t *am = &api_main; + vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr; + + mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + data_length); + memset (mp, 0, sizeof (*mp)); + memcpy (mp->data, data, data_length); + mp->_vl_msg_id = ntohs (VL_API_RPC_CALL); + mp->function = (u64)fp; + mp->need_barrier_sync = 1; + + /* Use the "normal" control-plane mechanism for the main thread */ + vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *)&mp); +} + + +#define foreach_rpc_api_msg \ +_(RPC_CALL,rpc_call) \ +_(RPC_REPLY,rpc_reply) + +static clib_error_t * +rpc_api_hookup (vlib_main_t *vm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 0 /* do not trace */); + foreach_rpc_api_msg; +#undef _ + return 0; +} + +VLIB_API_INIT_FUNCTION(rpc_api_hookup); diff --git a/vnet/vnet/devices/dpdk/threads.h b/vnet/vnet/devices/dpdk/threads.h new file mode 100644 index 00000000000..8f0fcbdb465 --- /dev/null +++ b/vnet/vnet/devices/dpdk/threads.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_dpdk_threads_h__ +#define __included_dpdk_threads_h__ + +#include <vnet/vnet.h> + +void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); + +typedef void (*dpdk_worker_thread_callback_t) (vlib_main_t *vm); + +void dpdk_worker_thread (vlib_worker_thread_t * w, + char *io_name, + dpdk_worker_thread_callback_t callback); + +int dpdk_frame_queue_dequeue (vlib_main_t *vm); + +#endif /* __included_dpdk_threads_h__ */ diff --git a/vnet/vnet/devices/dpdk/vhost_user.c b/vnet/vnet/devices/dpdk/vhost_user.c new file mode 100644 index 00000000000..5ab4c22ed3e --- /dev/null +++ b/vnet/vnet/devices/dpdk/vhost_user.c @@ -0,0 +1,1550 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/stat.h> +#include <sys/vfs.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> + +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/error.h> +#include <vppinfra/format.h> + +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/dpdk/dpdk.h> + +#include <vnet/devices/virtio/vhost-user.h> + +#define VHOST_USER_DEBUG_SOCKET 0 + +#if VHOST_USER_DEBUG_SOCKET == 1 +#define DBG_SOCK(args...) clib_warning(args); +#else +#define DBG_SOCK(args...) +#endif + +/* + * DPDK vhost-user functions + */ + +/* portions taken from dpdk + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +static uint64_t +qva_to_vva(struct virtio_net *dev, uint64_t qemu_va) +{ + struct virtio_memory_regions *region; + uint64_t vhost_va = 0; + uint32_t regionidx = 0; + + /* Find the region where the address lives. */ + for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { + region = &dev->mem->regions[regionidx]; + if ((qemu_va >= region->userspace_address) && + (qemu_va <= region->userspace_address + + region->memory_size)) { + vhost_va = qemu_va + region->guest_phys_address + + region->address_offset - + region->userspace_address; + break; + } + } + return vhost_va; +} + +static dpdk_device_t * +dpdk_vhost_user_device_from_hw_if_index(u32 hw_if_index) +{ + vnet_main_t *vnm = vnet_get_main(); + dpdk_main_t * dm = &dpdk_main; + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + if (xd->dev_type != VNET_DPDK_DEV_VHOST_USER) + return 0; + + return xd; +} + +static dpdk_device_t * +dpdk_vhost_user_device_from_sw_if_index(u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main(); + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, sw_if_index); + ASSERT (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + + return dpdk_vhost_user_device_from_hw_if_index(sw->hw_if_index); +} + +static inline void * map_guest_mem(dpdk_device_t * xd, u64 addr) +{ + dpdk_vu_intf_t * vui = xd->vu_intf; + struct virtio_memory * mem = xd->vu_vhost_dev.mem; + int i; + for (i=0; i<mem->nregions; i++) { + if ((mem->regions[i].guest_phys_address <= addr) && + ((mem->regions[i].guest_phys_address + mem->regions[i].memory_size) > addr)) { + return (void *) (vui->region_addr[i] + addr - mem->regions[i].guest_phys_address); + } + } + DBG_SOCK("failed to map guest mem addr %llx", addr); + return 0; +} + +static clib_error_t * +dpdk_create_vhost_user_if_internal (u32 * hw_if_index, u32 if_id) +{ + dpdk_main_t * dm = &dpdk_main; + vlib_main_t * vm = vlib_get_main(); + vlib_thread_main_t * tm = vlib_get_thread_main(); + vnet_sw_interface_t * sw; + clib_error_t * error; + dpdk_device_and_queue_t * dq; + + dpdk_device_t * xd = NULL; + u8 addr[6]; + int j; + + vlib_worker_thread_barrier_sync (vm); + + int inactive_cnt = vec_len(dm->vu_inactive_interfaces_device_index); + // if there are any inactive ifaces + if (inactive_cnt > 0) { + // take last + u32 vui_idx = dm->vu_inactive_interfaces_device_index[inactive_cnt - 1]; + if (vec_len(dm->devices) > vui_idx) { + xd = vec_elt_at_index (dm->devices, vui_idx); + if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) { + DBG_SOCK("reusing inactive vhost-user interface sw_if_index %d", xd->vlib_sw_if_index); + } else { + clib_warning("error: inactive vhost-user interface sw_if_index %d not VHOST_USER type!", + xd->vlib_sw_if_index); + // reset so new interface is created + xd = NULL; + } + } + // "remove" from inactive list + _vec_len(dm->vu_inactive_interfaces_device_index) -= 1; + } + + if (xd) { + // existing interface used - do not overwrite if_id if not needed + if (if_id != (u32)~0) + xd->vu_if_id = if_id; + + // reset virtqueues + for (j = 0; j < VIRTIO_QNUM; j++) + { + memset(xd->vu_vhost_dev.virtqueue[j], 0, sizeof(struct vhost_virtqueue)); + } + // reset lockp + memset ((void *) xd->lockp, 0, CLIB_CACHE_LINE_BYTES); + + // reset tx vectors + for (j = 0; j < tm->n_vlib_mains; j++) + { + vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, + sizeof(tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } + + // reset rx vector + for (j = 0; j < xd->rx_q_used; j++) + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE-1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } + } else { + // vui was not retrieved from inactive ifaces - create new + vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); + xd->dev_type = VNET_DPDK_DEV_VHOST_USER; + xd->rx_q_used = 1; + vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, CLIB_CACHE_LINE_BYTES); + + if (if_id == (u32)~0) + xd->vu_if_id = dm->next_vu_if_id++; + else + xd->vu_if_id = if_id; + + xd->device_index = xd - dm->devices; + xd->per_interface_next_index = ~0; + xd->vu_intf = NULL; + + xd->vu_vhost_dev.mem = clib_mem_alloc (sizeof(struct virtio_memory) + + VHOST_MEMORY_MAX_NREGIONS * + sizeof(struct virtio_memory_regions)); + + for (j = 0; j < VIRTIO_QNUM; j++) + { + xd->vu_vhost_dev.virtqueue[j] = clib_mem_alloc (sizeof(struct vhost_virtqueue)); + memset(xd->vu_vhost_dev.virtqueue[j], 0, sizeof(struct vhost_virtqueue)); + } + + xd->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) xd->lockp, 0, CLIB_CACHE_LINE_BYTES); + + vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + + for (j = 0; j < tm->n_vlib_mains; j++) + { + vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, + sizeof(tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } + + // reset rx vector + for (j = 0; j < xd->rx_q_used; j++) + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE-1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } + + vec_validate_aligned (xd->frames, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + + } + { + f64 now = vlib_time_now(vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (addr+2, &rnd, sizeof(rnd)); + addr[0] = 2; + addr[1] = 0xfe; + } + + error = ethernet_register_interface + (dm->vnet_main, + dpdk_device_class.index, + xd->device_index, + /* ethernet address */ addr, + &xd->vlib_hw_if_index, + 0); + + if (error) + return error; + + sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); + xd->vlib_sw_if_index = sw->sw_if_index; + + if (!xd->vu_intf) + xd->vu_intf = clib_mem_alloc (sizeof(*(xd->vu_intf))); + + *hw_if_index = xd->vlib_hw_if_index; + + int cpu = (xd->device_index % dm->input_cpu_count) + + dm->input_cpu_first_index; + + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + + // start polling if it was not started yet (because of no phys ifaces) + if (tm->n_vlib_mains == 1 && dpdk_input_node.state != VLIB_NODE_STATE_POLLING) + vlib_node_set_state (vm, dpdk_input_node.index, VLIB_NODE_STATE_POLLING); + + if (tm->n_vlib_mains > 1 && tm->main_thread_is_io_node) + vlib_node_set_state (vm, dpdk_io_input_node.index, VLIB_NODE_STATE_POLLING); + + if (tm->n_vlib_mains > 1 && !tm->main_thread_is_io_node) + vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + + vlib_worker_thread_barrier_release (vm); + return 0; +} + +static clib_error_t * +dpdk_vhost_user_get_features(u32 hw_if_index, u64 * features) +{ + *features = rte_vhost_feature_get(); + + DBG_SOCK("supported features: 0x%x", *features); + return 0; +} + +static clib_error_t * +dpdk_vhost_user_set_features(u32 hw_if_index, u64 features) +{ + dpdk_device_t * xd; + u16 hdr_len = sizeof(struct virtio_net_hdr); + + + if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { + clib_warning("not a vhost-user interface"); + return 0; + } + + xd->vu_vhost_dev.features = features; + + if (xd->vu_vhost_dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF)) + hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + + xd->vu_vhost_dev.virtqueue[VIRTIO_RXQ]->vhost_hlen = hdr_len; + xd->vu_vhost_dev.virtqueue[VIRTIO_TXQ]->vhost_hlen = hdr_len; + + xd->vu_is_running = 0; + + return 0; +} + +static clib_error_t * +dpdk_vhost_user_set_mem_table(u32 hw_if_index, vhost_user_memory_t * vum, int fd[]) +{ + struct virtio_memory * mem; + int i; + dpdk_device_t * xd; + dpdk_vu_intf_t * vui; + + if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { + clib_warning("not a vhost-user interface"); + return 0; + } + + vui = xd->vu_intf; + mem = xd->vu_vhost_dev.mem; + + mem->nregions = vum->nregions; + + for (i=0; i < mem->nregions; i++) { + u64 mapped_size, mapped_address; + + mem->regions[i].guest_phys_address = vum->regions[i].guest_phys_addr; + mem->regions[i].guest_phys_address_end = vum->regions[i].guest_phys_addr + + vum->regions[i].memory_size; + mem->regions[i].memory_size = vum->regions[i].memory_size; + mem->regions[i].userspace_address = vum->regions[i].userspace_addr; + + mapped_size = mem->regions[i].memory_size + vum->regions[i].mmap_offset; + mapped_address = (uint64_t)(uintptr_t)mmap(NULL, mapped_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd[i], 0); + + if ((void *)mapped_address == MAP_FAILED) + { + clib_warning("mmap error"); + return 0; + } + + mapped_address += vum->regions[i].mmap_offset; + vui->region_addr[i] = mapped_address; + vui->region_fd[i] = fd[i]; + mem->regions[i].address_offset = mapped_address - mem->regions[i].guest_phys_address; + + if (vum->regions[i].guest_phys_addr == 0) { + mem->base_address = vum->regions[i].userspace_addr; + mem->mapped_address = mem->regions[i].address_offset; + } + } + + xd->vu_is_running = 0; + + DBG_SOCK("done"); + return 0; +} + +static clib_error_t * +dpdk_vhost_user_set_vring_num(u32 hw_if_index, u8 idx, u32 num) +{ + dpdk_device_t * xd; + struct vhost_virtqueue *vq; + + DBG_SOCK("idx %u num %u", idx, num); + + if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { + clib_warning("not a vhost-user interface"); + return 0; + } + vq = xd->vu_vhost_dev.virtqueue[idx]; + vq->size = num; + + xd->vu_is_running = 0; + + return 0; +} + +static clib_error_t * +dpdk_vhost_user_set_vring_addr(u32 hw_if_index, u8 idx, u64 desc, u64 used, u64 avail) +{ + dpdk_device_t * xd; + struct vhost_virtqueue *vq; + + DBG_SOCK("idx %u desc 0x%x used 0x%x avail 0x%x", idx, desc, used, avail); + + if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { + clib_warning("not a vhost-user interface"); + return 0; + } + vq = xd->vu_vhost_dev.virtqueue[idx]; + + vq->desc = (struct vring_desc *) qva_to_vva(&xd->vu_vhost_dev, desc); + vq->used = (struct vring_used *) qva_to_vva(&xd->vu_vhost_dev, used); + vq->avail = (struct vring_avail *) qva_to_vva(&xd->vu_vhost_dev, avail); + + if (!(vq->desc && vq->used && vq->avail)) { + clib_warning("falied to set vring addr"); + } + + xd->vu_is_running = 0; + + return 0; +} + +static clib_error_t * +dpdk_vhost_user_get_vring_base(u32 hw_if_index, u8 idx, u32 * num) +{ + dpdk_device_t * xd; + struct vhost_virtqueue *vq; + + if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { + clib_warning("not a vhost-user interface"); + return 0; + } + + vq = xd->vu_vhost_dev.virtqueue[idx]; + *num = vq->last_used_idx; + + DBG_SOCK("idx %u num %u", idx, *num); + return 0; +} + +static clib_error_t * +dpdk_vhost_user_set_vring_base(u32 hw_if_index, u8 idx, u32 num) +{ + dpdk_device_t * xd; + struct vhost_virtqueue *vq; + + DBG_SOCK("idx %u num %u", idx, num); + + if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { + clib_warning("not a vhost-user interface"); + return 0; + } + + vq = xd->vu_vhost_dev.virtqueue[idx]; + vq->last_used_idx = num; + vq->last_used_idx_res = num; + + xd->vu_is_running = 0; + + return 0; +} + +static clib_error_t * +dpdk_vhost_user_set_vring_kick(u32 hw_if_index, u8 idx, int fd) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + struct vhost_virtqueue *vq, *vq0, *vq1; + + DBG_SOCK("idx %u fd %d", idx, fd); + + if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { + clib_warning("not a vhost-user interface"); + return 0; + } + + vq = xd->vu_vhost_dev.virtqueue[idx]; + vq->kickfd = fd; + + vq0 = xd->vu_vhost_dev.virtqueue[0]; + vq1 = xd->vu_vhost_dev.virtqueue[1]; + + if (vq0->desc && vq0->avail && vq0->used && + vq1->desc && vq1->avail && vq1->used) { + xd->vu_is_running = 1; + if (xd->admin_up) + vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP | + ETH_LINK_FULL_DUPLEX ); + } + + return 0; +} + + +static clib_error_t * +dpdk_vhost_user_set_vring_call(u32 hw_if_index, u8 idx, int fd) +{ + dpdk_device_t * xd; + struct vhost_virtqueue *vq; + + DBG_SOCK("idx %u fd %d", idx, fd); + + if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { + clib_warning("not a vhost-user interface"); + return 0; + } + + vq = xd->vu_vhost_dev.virtqueue[idx]; + /* reset callfd to force no interrupts */ + vq->callfd = -1; + + return 0; +} + +u8 +dpdk_vhost_user_want_interrupt(dpdk_device_t *xd, int idx) +{ + dpdk_vu_intf_t *vui = xd->vu_intf; + ASSERT(vui != NULL); + + if (PREDICT_FALSE(vui->num_vrings <= 0)) + return 0; + + dpdk_vu_vring *vring = &(vui->vrings[idx]); + struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx]; + + /* return if vm is interested in interrupts */ + return (vring->callfd > 0) && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT); +} + +void +dpdk_vhost_user_send_interrupt(vlib_main_t * vm, dpdk_device_t * xd, int idx) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_vu_intf_t *vui = xd->vu_intf; + ASSERT(vui != NULL); + + if (PREDICT_FALSE(vui->num_vrings <= 0)) + return; + + dpdk_vu_vring *vring = &(vui->vrings[idx]); + struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx]; + + /* if vm is interested in interrupts */ + if((vring->callfd > 0) && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { + u64 x = 1; + int rv __attribute__((unused)); + /* $$$$ pay attention to rv */ + rv = write(vring->callfd, &x, sizeof(x)); + vring->n_since_last_int = 0; + vring->int_deadline = vlib_time_now(vm) + dm->vhost_coalesce_time; + } +} + +/* + * vhost-user interface management functions + */ + +// initialize vui with specified attributes +static void +dpdk_vhost_user_vui_init(vnet_main_t * vnm, + dpdk_device_t *xd, int sockfd, + const char * sock_filename, + u8 is_server, u64 feature_mask, + u32 * sw_if_index) +{ + dpdk_vu_intf_t *vui = xd->vu_intf; + memset(vui, 0, sizeof(*vui)); + + vui->unix_fd = sockfd; + vui->num_vrings = 2; + vui->sock_is_server = is_server; + strncpy(vui->sock_filename, sock_filename, ARRAY_LEN(vui->sock_filename)-1); + vui->sock_errno = 0; + vui->is_up = 0; + vui->feature_mask = feature_mask; + vui->active = 1; + vui->unix_file_index = ~0; + + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); + + if (sw_if_index) + *sw_if_index = xd->vlib_sw_if_index; +} + +// register vui and start polling on it +static void +dpdk_vhost_user_vui_register(vlib_main_t * vm, dpdk_device_t *xd) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_vu_intf_t *vui = xd->vu_intf; + + hash_set (dm->vu_sw_if_index_by_listener_fd, vui->unix_fd, + xd->vlib_sw_if_index); +} + +static inline void +dpdk_vhost_user_if_disconnect(dpdk_device_t * xd) +{ + dpdk_vu_intf_t *vui = xd->vu_intf; + vnet_main_t * vnm = vnet_get_main(); + dpdk_main_t * dm = &dpdk_main; + + xd->admin_up = 0; + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); + + if (vui->unix_file_index != ~0) { + unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index); + vui->unix_file_index = ~0; + } + + hash_unset(dm->vu_sw_if_index_by_sock_fd, vui->unix_fd); + hash_unset(dm->vu_sw_if_index_by_listener_fd, vui->unix_fd); + close(vui->unix_fd); + vui->unix_fd = -1; + vui->is_up = 0; + + DBG_SOCK("interface ifindex %d disconnected", xd->vlib_sw_if_index); +} + +static clib_error_t * dpdk_vhost_user_callfd_read_ready (unix_file_t * uf) +{ + __attribute__((unused)) int n; + u8 buff[8]; + n = read(uf->file_descriptor, ((char*)&buff), 8); + return 0; +} + +static clib_error_t * dpdk_vhost_user_socket_read (unix_file_t * uf) +{ + int n; + int fd, number_of_fds = 0; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + vhost_user_msg_t msg; + struct msghdr mh; + struct iovec iov[1]; + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t *xd; + dpdk_vu_intf_t *vui; + struct cmsghdr *cmsg; + uword * p; + u8 q; + unix_file_t template = {0}; + vnet_main_t * vnm = vnet_get_main(); + + p = hash_get (dm->vu_sw_if_index_by_sock_fd, uf->file_descriptor); + if (p == 0) { + DBG_SOCK ("FD %d doesn't belong to any interface", + uf->file_descriptor); + return 0; + } + else + xd = dpdk_vhost_user_device_from_sw_if_index(p[0]); + + ASSERT(xd != NULL); + vui = xd->vu_intf; + + char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))]; + + memset(&mh, 0, sizeof(mh)); + memset(control, 0, sizeof(control)); + + /* set the payload */ + iov[0].iov_base = (void *) &msg; + iov[0].iov_len = VHOST_USER_MSG_HDR_SZ; + + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = control; + mh.msg_controllen = sizeof(control); + + n = recvmsg(uf->file_descriptor, &mh, 0); + + if (n != VHOST_USER_MSG_HDR_SZ) + goto close_socket; + + if (mh.msg_flags & MSG_CTRUNC) { + goto close_socket; + } + + cmsg = CMSG_FIRSTHDR(&mh); + + if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) && + (cmsg->cmsg_type == SCM_RIGHTS) && + (cmsg->cmsg_len - CMSG_LEN(0) <= VHOST_MEMORY_MAX_NREGIONS * sizeof(int))) { + number_of_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + memcpy(fds, CMSG_DATA(cmsg), number_of_fds * sizeof(int)); + } + + /* version 1, no reply bit set*/ + if ((msg.flags & 7) != 1) { + DBG_SOCK("malformed message received. closing socket"); + goto close_socket; + } + + { + int rv __attribute__((unused)); + /* $$$$ pay attention to rv */ + rv = read(uf->file_descriptor, ((char*)&msg) + n, msg.size); + } + + switch (msg.request) { + case VHOST_USER_GET_FEATURES: + DBG_SOCK("if %d msg VHOST_USER_GET_FEATURES", + xd->vlib_hw_if_index); + + msg.flags |= 4; + + dpdk_vhost_user_get_features(xd->vlib_hw_if_index, &msg.u64); + msg.u64 &= vui->feature_mask; + msg.size = sizeof(msg.u64); + break; + + case VHOST_USER_SET_FEATURES: + DBG_SOCK("if %d msg VHOST_USER_SET_FEATURES features 0x%016llx", + xd->vlib_hw_if_index, msg.u64); + + dpdk_vhost_user_set_features(xd->vlib_hw_if_index, msg.u64); + break; + + case VHOST_USER_SET_MEM_TABLE: + DBG_SOCK("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d", + xd->vlib_hw_if_index, msg.memory.nregions); + + if ((msg.memory.nregions < 1) || + (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS)) { + + DBG_SOCK("number of mem regions must be between 1 and %i", + VHOST_MEMORY_MAX_NREGIONS); + + goto close_socket; + } + + if (msg.memory.nregions != number_of_fds) { + DBG_SOCK("each memory region must have FD"); + goto close_socket; + } + + dpdk_vhost_user_set_mem_table(xd->vlib_hw_if_index, &msg.memory, fds); + break; + + case VHOST_USER_SET_VRING_NUM: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d", + xd->vlib_hw_if_index, msg.state.index, msg.state.num); + + if ((msg.state.num > 32768) || /* maximum ring size is 32768 */ + (msg.state.num == 0) || /* it cannot be zero */ + (msg.state.num % 2)) /* must be power of 2 */ + goto close_socket; + + dpdk_vhost_user_set_vring_num(xd->vlib_hw_if_index, msg.state.index, msg.state.num); + break; + + case VHOST_USER_SET_VRING_ADDR: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_ADDR idx %d", + xd->vlib_hw_if_index, msg.state.index); + + dpdk_vhost_user_set_vring_addr(xd->vlib_hw_if_index, msg.state.index, + msg.addr.desc_user_addr, + msg.addr.used_user_addr, + msg.addr.avail_user_addr); + break; + + case VHOST_USER_SET_OWNER: + DBG_SOCK("if %d msg VHOST_USER_SET_OWNER", + xd->vlib_hw_if_index); + break; + + case VHOST_USER_RESET_OWNER: + DBG_SOCK("if %d msg VHOST_USER_RESET_OWNER", + xd->vlib_hw_if_index); + break; + + case VHOST_USER_SET_VRING_CALL: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_CALL u64 %d", + xd->vlib_hw_if_index, msg.u64); + + q = (u8) (msg.u64 & 0xFF); + + if (!(msg.u64 & 0x100)) + { + if (number_of_fds != 1) + goto close_socket; + + /* if there is old fd, delete it */ + if (vui->vrings[q].callfd) { + unix_file_t * uf = pool_elt_at_index (unix_main.file_pool, + vui->vrings[q].callfd_idx); + unix_file_del (&unix_main, uf); + } + vui->vrings[q].callfd = fds[0]; + template.read_function = dpdk_vhost_user_callfd_read_ready; + template.file_descriptor = fds[0]; + vui->vrings[q].callfd_idx = unix_file_add (&unix_main, &template); + } + else + vui->vrings[q].callfd = -1; + + dpdk_vhost_user_set_vring_call(xd->vlib_hw_if_index, q, vui->vrings[q].callfd); + break; + + case VHOST_USER_SET_VRING_KICK: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_KICK u64 %d", + xd->vlib_hw_if_index, msg.u64); + + q = (u8) (msg.u64 & 0xFF); + + if (!(msg.u64 & 0x100)) + { + if (number_of_fds != 1) + goto close_socket; + + vui->vrings[q].kickfd = fds[0]; + } + else + vui->vrings[q].kickfd = -1; + + dpdk_vhost_user_set_vring_kick(xd->vlib_hw_if_index, q, vui->vrings[q].kickfd); + break; + + case VHOST_USER_SET_VRING_ERR: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_ERR u64 %d", + xd->vlib_hw_if_index, msg.u64); + + q = (u8) (msg.u64 & 0xFF); + + if (!(msg.u64 & 0x100)) + { + if (number_of_fds != 1) + goto close_socket; + + fd = fds[0]; + } + else + fd = -1; + + vui->vrings[q].errfd = fd; + break; + + case VHOST_USER_SET_VRING_BASE: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d", + xd->vlib_hw_if_index, msg.state.index, msg.state.num); + + dpdk_vhost_user_set_vring_base(xd->vlib_hw_if_index, msg.state.index, msg.state.num); + break; + + case VHOST_USER_GET_VRING_BASE: + DBG_SOCK("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d", + xd->vlib_hw_if_index, msg.state.index, msg.state.num); + + msg.flags |= 4; + msg.size = sizeof(msg.state); + + dpdk_vhost_user_get_vring_base(xd->vlib_hw_if_index, msg.state.index, &msg.state.num); + break; + + case VHOST_USER_NONE: + DBG_SOCK("if %d msg VHOST_USER_NONE", + xd->vlib_hw_if_index); + break; + + case VHOST_USER_SET_LOG_BASE: + DBG_SOCK("if %d msg VHOST_USER_SET_LOG_BASE", + xd->vlib_hw_if_index); + break; + + case VHOST_USER_SET_LOG_FD: + DBG_SOCK("if %d msg VHOST_USER_SET_LOG_FD", + xd->vlib_hw_if_index); + break; + + default: + DBG_SOCK("unknown vhost-user message %d received. closing socket", + msg.request); + goto close_socket; + } + + /* if we have pointers to descriptor table, go up*/ + if (!vui->is_up && + xd->vu_vhost_dev.virtqueue[VHOST_NET_VRING_IDX_TX]->desc && + xd->vu_vhost_dev.virtqueue[VHOST_NET_VRING_IDX_RX]->desc) { + + DBG_SOCK("interface %d connected", xd->vlib_sw_if_index); + + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); + vui->is_up = 1; + } + + /* if we need to reply */ + if (msg.flags & 4) + { + n = send(uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); + if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) + goto close_socket; + } + + return 0; + +close_socket: + DBG_SOCK("error: close_socket"); + dpdk_vhost_user_if_disconnect(xd); + return 0; +} + +static clib_error_t * dpdk_vhost_user_socket_error (unix_file_t * uf) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t *xd; + uword * p; + + p = hash_get (dm->vu_sw_if_index_by_sock_fd, uf->file_descriptor); + if (p == 0) { + DBG_SOCK ("FD %d doesn't belong to any interface", + uf->file_descriptor); + return 0; + } + else + xd = dpdk_vhost_user_device_from_sw_if_index(p[0]); + + dpdk_vhost_user_if_disconnect(xd); + return 0; +} + +static clib_error_t * dpdk_vhost_user_socksvr_accept_ready (unix_file_t * uf) +{ + int client_fd, client_len; + struct sockaddr_un client; + unix_file_t template = {0}; + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd = NULL; + dpdk_vu_intf_t * vui; + uword * p; + + p = hash_get (dm->vu_sw_if_index_by_listener_fd, + uf->file_descriptor); + if (p == 0) { + DBG_SOCK ("fd %d doesn't belong to any interface", + uf->file_descriptor); + return 0; + } + + xd = dpdk_vhost_user_device_from_sw_if_index(p[0]); + ASSERT(xd != NULL); + vui = xd->vu_intf; + + client_len = sizeof(client); + client_fd = accept (uf->file_descriptor, + (struct sockaddr *)&client, + (socklen_t *)&client_len); + + if (client_fd < 0) + return clib_error_return_unix (0, "accept"); + + template.read_function = dpdk_vhost_user_socket_read; + template.error_function = dpdk_vhost_user_socket_error; + template.file_descriptor = client_fd; + vui->unix_file_index = unix_file_add (&unix_main, &template); + + vui->client_fd = client_fd; + hash_set (dm->vu_sw_if_index_by_sock_fd, vui->client_fd, + xd->vlib_sw_if_index); + + return 0; +} + +// init server socket on specified sock_filename +static int dpdk_vhost_user_init_server_sock(const char * sock_filename, int *sockfd) +{ + int rv = 0, len; + struct sockaddr_un un; + int fd; + /* create listening socket */ + fd = socket(AF_UNIX, SOCK_STREAM, 0); + + if (fd < 0) { + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + + un.sun_family = AF_UNIX; + strcpy((char *) un.sun_path, (char *) sock_filename); + + /* remove if exists */ + unlink( (char *) sock_filename); + + len = strlen((char *) un.sun_path) + strlen((char *) sock_filename); + + if (bind(fd, (struct sockaddr *) &un, len) == -1) { + rv = VNET_API_ERROR_SYSCALL_ERROR_2; + goto error; + } + + if (listen(fd, 1) == -1) { + rv = VNET_API_ERROR_SYSCALL_ERROR_3; + goto error; + } + + unix_file_t template = {0}; + template.read_function = dpdk_vhost_user_socksvr_accept_ready; + template.file_descriptor = fd; + unix_file_add (&unix_main, &template); + *sockfd = fd; + return rv; + +error: + close(fd); + return rv; +} + +/* + * vhost-user interface control functions used from vpe api + */ + +int dpdk_vhost_user_create_if(vnet_main_t * vnm, vlib_main_t * vm, + const char * sock_filename, + u8 is_server, + u32 * sw_if_index, + u64 feature_mask, + u8 renumber, u32 custom_dev_instance) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t *xd; + u32 hw_if_idx = ~0; + int sockfd = -1; + int rv = 0; + + // using virtio vhost user? + if (dm->use_virtio_vhost) { + return vhost_user_create_if(vnm, vm, sock_filename, is_server, + sw_if_index, feature_mask, renumber, custom_dev_instance); + } + + if (is_server) { + if ((rv = dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0) { + return rv; + } + } + + if (renumber) { + // set next vhost-user if id if custom one is higher or equal + if (custom_dev_instance >= dm->next_vu_if_id) + dm->next_vu_if_id = custom_dev_instance + 1; + + dpdk_create_vhost_user_if_internal(&hw_if_idx, custom_dev_instance); + } else + dpdk_create_vhost_user_if_internal(&hw_if_idx, (u32)~0); + DBG_SOCK("dpdk vhost-user interface created hw_if_index %d", hw_if_idx); + + xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_idx); + ASSERT(xd != NULL); + + dpdk_vhost_user_vui_init (vnm, xd, sockfd, sock_filename, is_server, + feature_mask, sw_if_index); + + dpdk_vhost_user_vui_register (vm, xd); + return rv; +} + +int dpdk_vhost_user_modify_if(vnet_main_t * vnm, vlib_main_t * vm, + const char * sock_filename, + u8 is_server, + u32 sw_if_index, + u64 feature_mask, + u8 renumber, u32 custom_dev_instance) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + dpdk_vu_intf_t * vui = NULL; + u32 sw_if_idx = ~0; + int sockfd = -1; + int rv = 0; + + // using virtio vhost user? + if (dm->use_virtio_vhost) { + return vhost_user_modify_if(vnm, vm, sock_filename, is_server, + sw_if_index, feature_mask, renumber, custom_dev_instance); + } + + xd = dpdk_vhost_user_device_from_sw_if_index(sw_if_index); + + if (xd == NULL) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + vui = xd->vu_intf; + + // interface is inactive + vui->active = 0; + // disconnect interface sockets + dpdk_vhost_user_if_disconnect(xd); + + if (is_server) { + if ((rv = dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0) { + return rv; + } + } + + dpdk_vhost_user_vui_init (vnm, xd, sockfd, sock_filename, is_server, + feature_mask, &sw_if_idx); + + if (renumber) { + vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); + } + + dpdk_vhost_user_vui_register (vm, xd); + + return rv; +} + +int dpdk_vhost_user_delete_if(vnet_main_t * vnm, vlib_main_t * vm, + u32 sw_if_index) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd = NULL; + dpdk_vu_intf_t * vui; + int rv = 0; + + // using virtio vhost user? + if (dm->use_virtio_vhost) { + return vhost_user_delete_if(vnm, vm, sw_if_index); + } + + xd = dpdk_vhost_user_device_from_sw_if_index(sw_if_index); + + if (xd == NULL) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + vui = xd->vu_intf; + + // interface is inactive + vui->active = 0; + // disconnect interface sockets + dpdk_vhost_user_if_disconnect(xd); + // add to inactive interface list + vec_add1 (dm->vu_inactive_interfaces_device_index, xd->device_index); + + ethernet_delete_interface (vnm, xd->vlib_hw_if_index); + DBG_SOCK ("deleted (deactivated) vhost-user interface sw_if_index %d", sw_if_index); + + return rv; +} + +int dpdk_vhost_user_dump_ifs(vnet_main_t * vnm, vlib_main_t * vm, vhost_user_intf_details_t **out_vuids) +{ + int rv = 0; + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + dpdk_vu_intf_t * vui; + struct virtio_net * vhost_dev; + vhost_user_intf_details_t * r_vuids = NULL; + vhost_user_intf_details_t * vuid = NULL; + u32 * hw_if_indices = 0; + vnet_hw_interface_t * hi; + u8 *s = NULL; + int i; + + if (!out_vuids) + return -1; + + // using virtio vhost user? + if (dm->use_virtio_vhost) { + return vhost_user_dump_ifs(vnm, vm, out_vuids); + } + + vec_foreach (xd, dm->devices) { + if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER && + xd->vu_intf->active) + vec_add1(hw_if_indices, xd->vlib_hw_if_index); + } + + for (i = 0; i < vec_len (hw_if_indices); i++) { + hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); + xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_indices[i]); + if (!xd) { + clib_warning("invalid vhost-user interface hw_if_index %d", hw_if_indices[i]); + continue; + } + + vui = xd->vu_intf; + ASSERT(vui != NULL); + vhost_dev = &xd->vu_vhost_dev; + u32 virtio_net_hdr_sz = (vui->num_vrings > 0 ? + vhost_dev->virtqueue[0]->vhost_hlen : 0); + + vec_add2(r_vuids, vuid, 1); + vuid->sw_if_index = xd->vlib_sw_if_index; + vuid->virtio_net_hdr_sz = virtio_net_hdr_sz; + vuid->features = vhost_dev->features; + vuid->is_server = vui->sock_is_server; + vuid->num_regions = (vhost_dev->mem != NULL ? vhost_dev->mem->nregions : 0); + vuid->sock_errno = vui->sock_errno; + strncpy((char *)vuid->sock_filename, (char *)vui->sock_filename, + ARRAY_LEN(vuid->sock_filename)-1); + + s = format (s, "%v%c", hi->name, 0); + + strncpy((char *)vuid->if_name, (char *)s, + ARRAY_LEN(vuid->if_name)-1); + _vec_len(s) = 0; + } + + vec_free (s); + vec_free (hw_if_indices); + + *out_vuids = r_vuids; + + return rv; +} + +/* + * Processing functions called from dpdk process fn + */ + +typedef struct { + struct sockaddr_un sun; + int sockfd; + unix_file_t template; + uword *event_data; +} dpdk_vu_process_state; + +void dpdk_vhost_user_process_init (void **ctx) +{ + dpdk_vu_process_state *state = clib_mem_alloc (sizeof(dpdk_vu_process_state)); + memset(state, 0, sizeof(*state)); + state->sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + state->sun.sun_family = AF_UNIX; + state->template.read_function = dpdk_vhost_user_socket_read; + state->template.error_function = dpdk_vhost_user_socket_error; + state->event_data = 0; + *ctx = state; +} + +void dpdk_vhost_user_process_cleanup (void *ctx) +{ + clib_mem_free(ctx); +} + +uword dpdk_vhost_user_process_if (vlib_main_t *vm, dpdk_device_t *xd, void *ctx) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_vu_process_state *state = (dpdk_vu_process_state *)ctx; + dpdk_vu_intf_t *vui = xd->vu_intf; + + if (vui->sock_is_server || !vui->active) + return 0; + + if (vui->unix_fd == -1) { + /* try to connect */ + strncpy(state->sun.sun_path, (char *) vui->sock_filename, sizeof(state->sun.sun_path) - 1); + + if (connect(state->sockfd, (struct sockaddr *) &(state->sun), sizeof(struct sockaddr_un)) == 0) { + vui->sock_errno = 0; + vui->unix_fd = state->sockfd; + state->template.file_descriptor = state->sockfd; + vui->unix_file_index = unix_file_add (&unix_main, &(state->template)); + hash_set (dm->vu_sw_if_index_by_sock_fd, state->sockfd, xd->vlib_sw_if_index); + + state->sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (state->sockfd < 0) + return -1; + } else { + vui->sock_errno = errno; + } + } else { + /* check if socket is alive */ + int error = 0; + socklen_t len = sizeof (error); + int retval = getsockopt(vui->unix_fd, SOL_SOCKET, SO_ERROR, &error, &len); + + if (retval) + dpdk_vhost_user_if_disconnect(xd); + } + return 0; +} + +/* + * CLI functions + */ + +static clib_error_t * +dpdk_vhost_user_connect_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dpdk_main_t * dm = &dpdk_main; + unformat_input_t _line_input, * line_input = &_line_input; + u8 * sock_filename = NULL; + u32 sw_if_index; + u8 is_server = 0; + u64 feature_mask = (u64)~0; + u8 renumber = 0; + u32 custom_dev_instance = ~0; + + if (dm->use_virtio_vhost) { + return vhost_user_connect_command_fn(vm, input, cmd); + } + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "socket %s", &sock_filename)) + ; + else if (unformat (line_input, "server")) + is_server = 1; + else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask)) + ; + else if (unformat (line_input, "renumber %d", &custom_dev_instance)) { + renumber = 1; + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + vnet_main_t *vnm = vnet_get_main(); + if (sock_filename == NULL) + return clib_error_return (0, "missing socket file"); + + dpdk_vhost_user_create_if(vnm, vm, (char *)sock_filename, + is_server, &sw_if_index, feature_mask, + renumber, custom_dev_instance); + + vec_free(sock_filename); + return 0; +} + +VLIB_CLI_COMMAND (dpdk_vhost_user_connect_command, static) = { + .path = "create vhost-user", + .short_help = "create vhost-user socket <socket-filename> [server] [feature-mask <hex>] [renumber <dev_instance>]", + .function = dpdk_vhost_user_connect_command_fn, +}; + +static clib_error_t * +dpdk_vhost_user_delete_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dpdk_main_t * dm = &dpdk_main; + clib_error_t * error = 0; + unformat_input_t _line_input, * line_input = &_line_input; + u32 sw_if_index = ~0; + + if (dm->use_virtio_vhost) { + return vhost_user_delete_command_fn(vm, input, cmd); + } + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (sw_if_index == ~0) { + error = clib_error_return (0, "invalid sw_if_index", + format_unformat_error, input); + return error; + } + + vnet_main_t *vnm = vnet_get_main(); + + dpdk_vhost_user_delete_if(vnm, vm, sw_if_index); + + return 0; +} + +VLIB_CLI_COMMAND (dpdk_vhost_user_delete_command, static) = { + .path = "delete vhost-user", + .short_help = "delete vhost-user sw_if_index <nn>", + .function = dpdk_vhost_user_delete_command_fn, +}; + +#define foreach_dpdk_vhost_feature \ + _ (VIRTIO_NET_F_MRG_RXBUF) \ + _ (VIRTIO_NET_F_CTRL_VQ) \ + _ (VIRTIO_NET_F_CTRL_RX) + +static clib_error_t * +show_dpdk_vhost_user_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t * error = 0; + dpdk_main_t * dm = &dpdk_main; + vnet_main_t * vnm = vnet_get_main(); + dpdk_device_t * xd; + dpdk_vu_intf_t * vui; + struct virtio_net * vhost_dev; + u32 hw_if_index, * hw_if_indices = 0; + vnet_hw_interface_t * hi; + int i, j, q; + int show_descr = 0; + struct virtio_memory * mem; + struct feat_struct { u8 bit; char *str;}; + struct feat_struct *feat_entry; + + static struct feat_struct feat_array[] = { +#define _(f) { .str = #f, .bit = f, }, + foreach_dpdk_vhost_feature +#undef _ + { .str = NULL } + }; + + if (dm->use_virtio_vhost) { + return show_vhost_user_command_fn(vm, input, cmd); + } + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) { + vec_add1 (hw_if_indices, hw_if_index); + vlib_cli_output(vm, "add %d", hw_if_index); + } + else if (unformat (input, "descriptors") || unformat (input, "desc") ) + show_descr = 1; + else { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + if (vec_len (hw_if_indices) == 0) { + vec_foreach (xd, dm->devices) { + if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER && xd->vu_intf->active) + vec_add1(hw_if_indices, xd->vlib_hw_if_index); + } + } + + vlib_cli_output (vm, "DPDK vhost-user interfaces"); + vlib_cli_output (vm, "Global:\n coalesce frames %d time %e\n\n", + dm->vhost_coalesce_frames, dm->vhost_coalesce_time); + + for (i = 0; i < vec_len (hw_if_indices); i++) { + hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); + + if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_indices[i]))) { + error = clib_error_return (0, "not dpdk vhost-user interface: '%s'", + hi->name); + goto done; + } + vui = xd->vu_intf; + vhost_dev = &xd->vu_vhost_dev; + mem = vhost_dev->mem; + u32 virtio_net_hdr_sz = (vui->num_vrings > 0 ? + vhost_dev->virtqueue[0]->vhost_hlen : 0); + + vlib_cli_output (vm, "Interface: %s (ifindex %d)", + hi->name, hw_if_indices[i]); + + vlib_cli_output (vm, "virtio_net_hdr_sz %d\n features (0x%llx): \n", + virtio_net_hdr_sz, xd->vu_vhost_dev.features); + + feat_entry = (struct feat_struct *) &feat_array; + while(feat_entry->str) { + if (xd->vu_vhost_dev.features & (1 << feat_entry->bit)) + vlib_cli_output (vm, " %s (%d)", feat_entry->str, feat_entry->bit); + feat_entry++; + } + + vlib_cli_output (vm, "\n"); + + vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n", + vui->sock_filename, vui->sock_is_server ? "server" : "client", + strerror(vui->sock_errno)); + + vlib_cli_output (vm, " Memory regions (total %d)\n", mem->nregions); + + if (mem->nregions){ + vlib_cli_output(vm, " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n"); + vlib_cli_output(vm, " ====== ===== ================== ================== ================== ================== ==================\n"); + } + for (j = 0; j < mem->nregions; j++) { + vlib_cli_output(vm, " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", j, + vui->region_fd[j], + mem->regions[j].guest_phys_address, + mem->regions[j].memory_size, + mem->regions[j].userspace_address, + mem->regions[j].address_offset, + vui->region_addr[j]); + } + for (q = 0; q < vui->num_vrings; q++) { + struct vhost_virtqueue *vq = vhost_dev->virtqueue[q]; + + vlib_cli_output(vm, "\n Virtqueue %d\n", q); + + vlib_cli_output(vm, " qsz %d last_used_idx %d last_used_idx_res %d\n", + vq->size, vq->last_used_idx, vq->last_used_idx_res); + + if (vq->avail && vq->used) + vlib_cli_output(vm, " avail.flags %x avail.idx %d used.flags %x used.idx %d\n", + vq->avail->flags, vq->avail->idx, vq->used->flags, vq->used->idx); + + vlib_cli_output(vm, " kickfd %d callfd %d errfd %d\n", + vui->vrings[q].kickfd, + vui->vrings[q].callfd, + vui->vrings[q].errfd); + + if (show_descr) { + vlib_cli_output(vm, "\n descriptor table:\n"); + vlib_cli_output(vm, " id addr len flags next user_addr\n"); + vlib_cli_output(vm, " ===== ================== ===== ====== ===== ==================\n"); + for(j = 0; j < vq->size; j++) { + vlib_cli_output(vm, " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", + j, + vq->desc[j].addr, + vq->desc[j].len, + vq->desc[j].flags, + vq->desc[j].next, + (u64) map_guest_mem(xd, vq->desc[j].addr));} + } + } + vlib_cli_output (vm, "\n"); + } +done: + vec_free (hw_if_indices); + return error; +} + +VLIB_CLI_COMMAND (show_vhost_user_command, static) = { + .path = "show vhost-user", + .short_help = "show vhost-user interface", + .function = show_dpdk_vhost_user_command_fn, +}; + diff --git a/vnet/vnet/devices/ssvm/node.c b/vnet/vnet/devices/ssvm/node.c new file mode 100644 index 00000000000..fe53d1199a2 --- /dev/null +++ b/vnet/vnet/devices/ssvm/node.c @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ssvm_eth.h" + +vlib_node_registration_t ssvm_eth_input_node; + +typedef struct { + u32 next_index; + u32 sw_if_index; +} ssvm_eth_input_trace_t; + +/* packet trace format function */ +static u8 * format_ssvm_eth_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ssvm_eth_input_trace_t * t = va_arg (*args, ssvm_eth_input_trace_t *); + + s = format (s, "SSVM_ETH_INPUT: sw_if_index %d, next index %d", + t->sw_if_index, t->next_index); + return s; +} + +vlib_node_registration_t ssvm_eth_input_node; + +#define foreach_ssvm_eth_input_error \ +_(NO_BUFFERS, "Rx packet drops (no buffers)") + +typedef enum { +#define _(sym,str) SSVM_ETH_INPUT_ERROR_##sym, + foreach_ssvm_eth_input_error +#undef _ + SSVM_ETH_INPUT_N_ERROR, +} ssvm_eth_input_error_t; + +static char * ssvm_eth_input_error_strings[] = { +#define _(sym,string) string, + foreach_ssvm_eth_input_error +#undef _ +}; + +typedef enum { + SSVM_ETH_INPUT_NEXT_DROP, + SSVM_ETH_INPUT_NEXT_ETHERNET_INPUT, + SSVM_ETH_INPUT_NEXT_IP4_INPUT, + SSVM_ETH_INPUT_NEXT_IP6_INPUT, + SSVM_ETH_INPUT_NEXT_MPLS_INPUT, + SSVM_ETH_INPUT_N_NEXT, +} ssvm_eth_input_next_t; + +static inline uword +ssvm_eth_device_input (ssvm_eth_main_t * em, + ssvm_private_t * intfc, + vlib_node_runtime_t * node) +{ + ssvm_shared_header_t * sh = intfc->sh; + vlib_main_t * vm = em->vlib_main; + unix_shared_memory_queue_t * q; + ssvm_eth_queue_elt_t * elt, * elts; + u32 elt_index; + u32 my_pid = intfc->my_pid; + int rx_queue_index; + u32 n_to_alloc = VLIB_FRAME_SIZE * 2; + u32 n_allocated, n_present_in_cache; +#if DPDK > 0 + u32 next_index = DPDK_RX_NEXT_ETHERNET_INPUT; +#else + u32 next_index = 0; +#endif + vlib_buffer_free_list_t * fl; + u32 n_left_to_next, * to_next; + u32 next0; + u32 n_buffers; + u32 n_available; + u32 bi0, saved_bi0; + vlib_buffer_t * b0, * prev; + u32 saved_cache_size = 0; + ethernet_header_t * eh0; + u16 type0; + u32 n_rx_bytes = 0, l3_offset0; + u32 cpu_index = os_get_cpu_number(); + u32 trace_cnt __attribute__((unused)) = vlib_get_trace_count (vm, node); + volatile u32 * lock; + u32 * elt_indices; + + /* Either side down? buh-bye... */ + if ((u64)(sh->opaque [MASTER_ADMIN_STATE_INDEX]) == 0 || + (u64)(sh->opaque [SLAVE_ADMIN_STATE_INDEX]) == 0) + return 0; + + if (intfc->i_am_master) + q = (unix_shared_memory_queue_t *)(sh->opaque [TO_MASTER_Q_INDEX]); + else + q = (unix_shared_memory_queue_t *)(sh->opaque [TO_SLAVE_Q_INDEX]); + + /* Nothing to do? */ + if (q->cursize == 0) + return 0; + + fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + vec_reset_length (intfc->rx_queue); + + lock = (u32 *) q; + while (__sync_lock_test_and_set (lock, 1)) + ; + while (q->cursize > 0) + { + unix_shared_memory_queue_sub_raw (q, (u8 *)&elt_index); + ASSERT(elt_index < 2048); + vec_add1 (intfc->rx_queue, elt_index); + } + CLIB_MEMORY_BARRIER(); + *lock = 0; + + n_present_in_cache = vec_len (em->buffer_cache); + + if (vec_len (em->buffer_cache) < vec_len (intfc->rx_queue) * 2) + { + vec_validate (em->buffer_cache, + n_to_alloc + vec_len (em->buffer_cache) - 1); + n_allocated = + vlib_buffer_alloc (vm, &em->buffer_cache [n_present_in_cache], + n_to_alloc); + + n_present_in_cache += n_allocated; + _vec_len (em->buffer_cache) = n_present_in_cache; + } + + elts = (ssvm_eth_queue_elt_t *) (sh->opaque [CHUNK_POOL_INDEX]); + + n_buffers = vec_len (intfc->rx_queue); + rx_queue_index = 0; + + while (n_buffers > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_buffers > 0 && n_left_to_next > 0) + { + elt = elts + intfc->rx_queue[rx_queue_index]; + + saved_cache_size = n_present_in_cache; + if (PREDICT_FALSE(saved_cache_size == 0)) + { + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + goto out; + } + saved_bi0 = bi0 = em->buffer_cache [--n_present_in_cache]; + b0 = vlib_get_buffer (vm, bi0); + prev = 0; + + while (1) + { + vlib_buffer_init_for_free_list (b0, fl); + b0->clone_count = 0; + + b0->current_data = elt->current_data_hint; + b0->current_length = elt->length_this_buffer; + b0->total_length_not_including_first_buffer = + elt->total_length_not_including_first_buffer; + + memcpy (b0->data + b0->current_data, elt->data, + b0->current_length); + + if (PREDICT_FALSE(prev != 0)) + prev->next_buffer = bi0; + + if (PREDICT_FALSE(elt->flags & SSVM_BUFFER_NEXT_PRESENT)) + { + prev = b0; + if (PREDICT_FALSE(n_present_in_cache == 0)) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + goto out; + } + bi0 = em->buffer_cache [--n_present_in_cache]; + b0 = vlib_get_buffer (vm, bi0); + } + else + break; + } + + saved_cache_size = n_present_in_cache; + + to_next[0] = saved_bi0; + to_next++; + n_left_to_next--; + + b0 = vlib_get_buffer (vm, saved_bi0); + eh0 = vlib_buffer_get_current (b0); + + type0 = clib_net_to_host_u16 (eh0->type); + + next0 = SSVM_ETH_INPUT_NEXT_ETHERNET_INPUT; + + if (type0 == ETHERNET_TYPE_IP4) + next0 = SSVM_ETH_INPUT_NEXT_IP4_INPUT; + else if (type0 == ETHERNET_TYPE_IP6) + next0 = SSVM_ETH_INPUT_NEXT_IP6_INPUT; + else if (type0 == ETHERNET_TYPE_MPLS_UNICAST) + next0 = SSVM_ETH_INPUT_NEXT_MPLS_INPUT; + + l3_offset0 = ((next0 == SSVM_ETH_INPUT_NEXT_IP4_INPUT || + next0 == SSVM_ETH_INPUT_NEXT_IP6_INPUT || + next0 == SSVM_ETH_INPUT_NEXT_MPLS_INPUT) ? + sizeof (ethernet_header_t) : 0); + + n_rx_bytes += b0->current_length + + b0->total_length_not_including_first_buffer; + + b0->current_data += l3_offset0; + b0->current_length -= l3_offset0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + + vnet_buffer(b0)->sw_if_index[VLIB_RX] = intfc->vlib_hw_if_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = (u32)~0; + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b0); + + /* $$$$ tracing */ + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + n_buffers--; + rx_queue_index++; + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + out: + if (em->buffer_cache) + _vec_len (em->buffer_cache) = saved_cache_size; + else + ASSERT (saved_cache_size == 0); + + ssvm_lock (sh, my_pid, 2); + + ASSERT(vec_len(intfc->rx_queue) > 0); + + n_available = (u32)(u64)(sh->opaque[CHUNK_POOL_NFREE]); + elt_indices = (u32 *)(sh->opaque[CHUNK_POOL_FREELIST_INDEX]); + + memcpy (&elt_indices[n_available], intfc->rx_queue, + vec_len (intfc->rx_queue) * sizeof (u32)); + + n_available += vec_len (intfc->rx_queue); + sh->opaque[CHUNK_POOL_NFREE] = (void *) (u64) n_available; + + ssvm_unlock (sh); + + vlib_error_count (vm, node->node_index, SSVM_ETH_INPUT_ERROR_NO_BUFFERS, + n_buffers); + + vlib_increment_combined_counter + (vnet_get_main()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, cpu_index, + intfc->vlib_hw_if_index, + rx_queue_index, n_rx_bytes); + + return rx_queue_index; +} + +static uword +ssvm_eth_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + ssvm_eth_main_t * em = &ssvm_eth_main; + ssvm_private_t * intfc; + uword n_rx_packets = 0; + + vec_foreach (intfc, em->intfcs) + { + n_rx_packets += ssvm_eth_device_input (em, intfc, node); + } + + return n_rx_packets; +} + +VLIB_REGISTER_NODE (ssvm_eth_input_node) = { + .function = ssvm_eth_input_node_fn, + .name = "ssvm_eth_input", + .vector_size = sizeof (u32), + .format_trace = format_ssvm_eth_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, + + .n_errors = ARRAY_LEN(ssvm_eth_input_error_strings), + .error_strings = ssvm_eth_input_error_strings, + + .n_next_nodes = SSVM_ETH_INPUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SSVM_ETH_INPUT_NEXT_DROP] = "error-drop", + [SSVM_ETH_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input", + [SSVM_ETH_INPUT_NEXT_IP4_INPUT] = "ip4-input", + [SSVM_ETH_INPUT_NEXT_IP6_INPUT] = "ip6-input", + [SSVM_ETH_INPUT_NEXT_MPLS_INPUT] = "mpls-gre-input", + }, +}; + diff --git a/vnet/vnet/devices/ssvm/ssvm_eth.c b/vnet/vnet/devices/ssvm/ssvm_eth.c new file mode 100644 index 00000000000..aad63f02bba --- /dev/null +++ b/vnet/vnet/devices/ssvm/ssvm_eth.c @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ssvm_eth.h" + +ssvm_eth_main_t ssvm_eth_main; + +#define foreach_ssvm_eth_tx_func_error \ +_(RING_FULL, "Tx packet drops (ring full)") \ +_(NO_BUFFERS, "Tx packet drops (no buffers)") \ +_(ADMIN_DOWN, "Tx packet drops (admin down)") + +typedef enum { +#define _(f,s) SSVM_ETH_TX_ERROR_##f, + foreach_ssvm_eth_tx_func_error +#undef _ + SSVM_ETH_TX_N_ERROR, +} ssvm_eth_tx_func_error_t; + +static u32 ssvm_eth_flag_change (vnet_main_t * vnm, + vnet_hw_interface_t * hi, + u32 flags); + +int ssvm_eth_create (ssvm_eth_main_t * em, u8 * name, int is_master) +{ + ssvm_private_t * intfc; + void * oldheap; + clib_error_t * e; + unix_shared_memory_queue_t * q; + ssvm_shared_header_t * sh; + ssvm_eth_queue_elt_t * elts; + u32 * elt_indices; + u8 enet_addr[6]; + int i, rv; + + vec_add2 (em->intfcs, intfc, 1); + + intfc->ssvm_size = em->segment_size; + intfc->i_am_master = 1; + intfc->name = name; + if (is_master == 0) + { + rv = ssvm_slave_init (intfc, 20 /* timeout in seconds */); + if (rv < 0) + return rv; + goto create_vnet_interface; + } + + intfc->requested_va = em->next_base_va; + em->next_base_va += em->segment_size; + rv = ssvm_master_init (intfc, intfc - em->intfcs /* master index */); + + if (rv < 0) + return rv; + + /* OK, segment created, set up queues and so forth. */ + + sh = intfc->sh; + oldheap = ssvm_push_heap (sh); + + q = unix_shared_memory_queue_init (em->queue_elts, sizeof (u32), + 0 /* consumer pid not interesting */, + 0 /* signal not sent */); + sh->opaque [TO_MASTER_Q_INDEX] = (void *)q; + q = unix_shared_memory_queue_init (em->queue_elts, sizeof (u32), + 0 /* consumer pid not interesting */, + 0 /* signal not sent */); + sh->opaque [TO_SLAVE_Q_INDEX] = (void *)q; + + /* + * Preallocate the requested number of buffer chunks + * There must be a better way to do this, etc. + * Add some slop to avoid pool reallocation, which will not go well + */ + elts = 0; + elt_indices = 0; + + vec_validate_aligned (elts, em->nbuffers - 1, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (elt_indices, em->nbuffers - 1, CLIB_CACHE_LINE_BYTES); + + for (i = 0; i < em->nbuffers; i++) + elt_indices[i] = i; + + sh->opaque [CHUNK_POOL_INDEX] = (void *) elts; + sh->opaque [CHUNK_POOL_FREELIST_INDEX] = (void *) elt_indices; + sh->opaque [CHUNK_POOL_NFREE] = (void *) em->nbuffers; + + ssvm_pop_heap (oldheap); + + create_vnet_interface: + + sh = intfc->sh; + + memset (enet_addr, 0, sizeof (enet_addr)); + enet_addr[0] = 2; + enet_addr[1] = 0xFE; + enet_addr[2] = is_master; + enet_addr[5] = sh->master_index; + + e = ethernet_register_interface + (em->vnet_main, ssvm_eth_device_class.index, + intfc - em->intfcs, + /* ethernet address */ enet_addr, + &intfc->vlib_hw_if_index, + ssvm_eth_flag_change); + + if (e) + { + clib_error_report (e); + /* $$$$ unmap offending region? */ + return VNET_API_ERROR_INVALID_INTERFACE; + } + + /* Declare link up */ + vnet_hw_interface_set_flags (em->vnet_main, intfc->vlib_hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + + /* Let the games begin... */ + if (is_master) + sh->ready = 1; + return 0; +} + +static clib_error_t * +ssvm_config (vlib_main_t * vm, unformat_input_t * input) +{ + u8 * name; + int is_master = 1; + int i, rv; + ssvm_eth_main_t * em = &ssvm_eth_main; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "base-va %llx", &em->next_base_va)) + ; + else if (unformat (input, "segment-size %lld", &em->segment_size)) + em->segment_size = 1ULL << (max_log2 (em->segment_size)); + else if (unformat (input, "nbuffers %lld", &em->nbuffers)) + ; + else if (unformat (input, "queue-elts %lld", &em->queue_elts)) + ; + else if (unformat (input, "slave")) + is_master = 0; + else if (unformat (input, "%s", &name)) + vec_add1 (em->names, name); + else + break; + } + + /* No configured instances, we're done... */ + if (vec_len (em->names) == 0) + return 0; + + for (i = 0; i < vec_len (em->names); i++) + { + rv = ssvm_eth_create (em, em->names[i], is_master); + if (rv < 0) + return clib_error_return (0, "ssvm_eth_create '%s' failed, error %d", + em->names[i], rv); + } + + vlib_node_set_state (vm, ssvm_eth_input_node.index, VLIB_NODE_STATE_POLLING); + + return 0; +} + +VLIB_CONFIG_FUNCTION (ssvm_config, "ssvm_eth"); + + +static clib_error_t * ssvm_eth_init (vlib_main_t * vm) +{ + ssvm_eth_main_t * em = &ssvm_eth_main; + + if (((sizeof(ssvm_eth_queue_elt_t) / CLIB_CACHE_LINE_BYTES) + * CLIB_CACHE_LINE_BYTES) != sizeof(ssvm_eth_queue_elt_t)) + clib_warning ("ssvm_eth_queue_elt_t size %d not a multiple of %d", + sizeof(ssvm_eth_queue_elt_t), CLIB_CACHE_LINE_BYTES); + + em->vlib_main = vm; + em->vnet_main = vnet_get_main(); + em->elog_main = &vm->elog_main; + + /* default config param values... */ + + em->next_base_va = 0x600000000ULL; + /* + * Allocate 2 full superframes in each dir (256 x 2 x 2 x 2048 bytes), + * 2mb; double that so we have plenty of space... 4mb + */ + em->segment_size = 8<<20; + em->nbuffers = 1024; + em->queue_elts = 512; + return 0; +} + +VLIB_INIT_FUNCTION (ssvm_eth_init); + +static char * ssvm_eth_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_ssvm_eth_tx_func_error +#undef _ +}; + +static u8 * format_ssvm_eth_device_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + + s = format (s, "ssvmEthernet%d", i); + return s; +} + +static u8 * format_ssvm_eth_device (u8 * s, va_list * args) +{ + s = format (s, "SSVM Ethernet"); + return s; +} + +static u8 * format_ssvm_eth_tx_trace (u8 * s, va_list * args) +{ + s = format (s, "Unimplemented..."); + return s; +} + + +static uword +ssvm_eth_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + ssvm_eth_main_t * em = &ssvm_eth_main; + vnet_interface_output_runtime_t * rd = (void *) node->runtime_data; + ssvm_private_t * intfc = vec_elt_at_index (em->intfcs, rd->dev_instance); + ssvm_shared_header_t * sh = intfc->sh; + unix_shared_memory_queue_t * q; + u32 * from; + u32 n_left; + ssvm_eth_queue_elt_t * elts, * elt, * prev_elt; + u32 my_pid = intfc->my_pid; + vlib_buffer_t * b0; + u32 bi0; + u32 size_this_buffer; + u32 chunks_this_buffer; + u8 i_am_master = intfc->i_am_master; + u32 elt_index; + int is_ring_full, interface_down; + int i; + volatile u32 *queue_lock; + u32 n_to_alloc = VLIB_FRAME_SIZE; + u32 n_allocated, n_present_in_cache, n_available; + u32 * elt_indices; + + if (i_am_master) + q = (unix_shared_memory_queue_t *)sh->opaque [TO_SLAVE_Q_INDEX]; + else + q = (unix_shared_memory_queue_t *)sh->opaque [TO_MASTER_Q_INDEX]; + + queue_lock = (u32 *) q; + + from = vlib_frame_vector_args (f); + n_left = f->n_vectors; + is_ring_full = 0; + interface_down = 0; + + n_present_in_cache = vec_len (em->chunk_cache); + + /* admin / link up/down check */ + if ((u64)(sh->opaque [MASTER_ADMIN_STATE_INDEX]) == 0 || + (u64)(sh->opaque [SLAVE_ADMIN_STATE_INDEX]) == 0) + { + interface_down = 1; + goto out; + } + + ssvm_lock (sh, my_pid, 1); + + elts = (ssvm_eth_queue_elt_t *) (sh->opaque [CHUNK_POOL_INDEX]); + elt_indices = (u32 *) (sh->opaque [CHUNK_POOL_FREELIST_INDEX]); + n_available = (u32) (u64) (sh->opaque [CHUNK_POOL_NFREE]); + + if (n_present_in_cache < n_left*2) + { + vec_validate (em->chunk_cache, + n_to_alloc + n_present_in_cache - 1); + + n_allocated = n_to_alloc < n_available ? n_to_alloc : n_available; + + if (PREDICT_TRUE(n_allocated > 0)) + { + memcpy (&em->chunk_cache[n_present_in_cache], + &elt_indices[n_available - n_allocated], + sizeof(u32) * n_allocated); + } + + n_present_in_cache += n_allocated; + n_available -= n_allocated; + sh->opaque [CHUNK_POOL_NFREE] = (void *) (u64) n_available; + _vec_len (em->chunk_cache) = n_present_in_cache; + } + + ssvm_unlock (sh); + + while (n_left) + { + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + size_this_buffer = vlib_buffer_length_in_chain (vm, b0); + chunks_this_buffer = (size_this_buffer + (SSVM_BUFFER_SIZE - 1)) + / SSVM_BUFFER_SIZE; + + /* If we're not going to be able to enqueue the buffer, tail drop. */ + if (q->cursize >= q->maxsize) + { + is_ring_full = 1; + break; + } + + prev_elt = 0; + elt_index = ~0; + for (i = 0; i < chunks_this_buffer; i++) + { + if (PREDICT_FALSE (n_present_in_cache == 0)) + goto out; + + elt_index = em->chunk_cache[--n_present_in_cache]; + elt = elts + elt_index; + + elt->type = SSVM_PACKET_TYPE; + elt->flags = 0; + elt->total_length_not_including_first_buffer = + b0->total_length_not_including_first_buffer; + elt->length_this_buffer = b0->current_length; + elt->current_data_hint = b0->current_data; + elt->owner = !i_am_master; + elt->tag = 1; + + memcpy (elt->data, b0->data + b0->current_data, b0->current_length); + + if (PREDICT_FALSE (prev_elt != 0)) + prev_elt->next_index = elt - elts; + + if (PREDICT_FALSE(i < (chunks_this_buffer-1))) + { + elt->flags = SSVM_BUFFER_NEXT_PRESENT; + ASSERT (b0->flags & VLIB_BUFFER_NEXT_PRESENT); + b0 = vlib_get_buffer (vm, b0->next_buffer); + } + prev_elt = elt; + } + + while (__sync_lock_test_and_set (queue_lock, 1)) + ; + + unix_shared_memory_queue_add_raw (q, (u8 *)&elt_index); + CLIB_MEMORY_BARRIER(); + *queue_lock = 0; + + from++; + n_left--; + } + + out: + if (PREDICT_FALSE(n_left)) + { + if (is_ring_full) + vlib_error_count (vm, node->node_index, SSVM_ETH_TX_ERROR_RING_FULL, + n_left); + else if (interface_down) + vlib_error_count (vm, node->node_index, SSVM_ETH_TX_ERROR_ADMIN_DOWN, + n_left); + else + vlib_error_count (vm, node->node_index, SSVM_ETH_TX_ERROR_NO_BUFFERS, + n_left); + + vlib_buffer_free (vm, from, n_left); + } + else + vlib_buffer_free (vm, vlib_frame_vector_args (f), f->n_vectors); + + if (PREDICT_TRUE(vec_len(em->chunk_cache))) + _vec_len(em->chunk_cache) = n_present_in_cache; + + return f->n_vectors; +} + +static void ssvm_eth_clear_hw_interface_counters (u32 instance) +{ + /* Nothing for now */ +} + +static clib_error_t * +ssvm_eth_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t * hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + ssvm_eth_main_t * em = &ssvm_eth_main; + ssvm_private_t * intfc = vec_elt_at_index (em->intfcs, hif->dev_instance); + ssvm_shared_header_t * sh; + + /* publish link-state in shared-memory, to discourage buffer-wasting */ + sh = intfc->sh; + if (intfc->i_am_master) + sh->opaque [MASTER_ADMIN_STATE_INDEX] = (void *) is_up; + else + sh->opaque [SLAVE_ADMIN_STATE_INDEX] = (void *) is_up; + + return 0; +} + +static clib_error_t * +ssvm_eth_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t * st, + int is_add) +{ + /* Nothing for now */ + return 0; +} + +/* + * Dynamically redirect all pkts from a specific interface + * to the specified node + */ +static void +ssvm_eth_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index, + u32 node_index) +{ + ssvm_eth_main_t * em = &ssvm_eth_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + ssvm_private_t * intfc = pool_elt_at_index (em->intfcs, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + intfc->per_interface_next_index = node_index; + return; + } + + intfc->per_interface_next_index = + vlib_node_add_next (em->vlib_main, ssvm_eth_input_node.index, node_index); +} + +static u32 ssvm_eth_flag_change (vnet_main_t * vnm, + vnet_hw_interface_t * hi, + u32 flags) +{ + /* nothing for now */ + return 0; +} + +VNET_DEVICE_CLASS (ssvm_eth_device_class) = { + .name = "ssvm-eth", + .tx_function = ssvm_eth_interface_tx, + .tx_function_n_errors = SSVM_ETH_TX_N_ERROR, + .tx_function_error_strings = ssvm_eth_tx_func_error_strings, + .format_device_name = format_ssvm_eth_device_name, + .format_device = format_ssvm_eth_device, + .format_tx_trace = format_ssvm_eth_tx_trace, + .clear_counters = ssvm_eth_clear_hw_interface_counters, + .admin_up_down_function = ssvm_eth_interface_admin_up_down, + .subif_add_del_function = ssvm_eth_subif_add_del_function, + .rx_redirect_to_node = ssvm_eth_set_interface_next_node, + .no_flatten_output_chains = 1, +}; diff --git a/vnet/vnet/devices/ssvm/ssvm_eth.h b/vnet/vnet/devices/ssvm/ssvm_eth.h new file mode 100644 index 00000000000..1b077220305 --- /dev/null +++ b/vnet/vnet/devices/ssvm/ssvm_eth.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_eth_h__ +#define __included_ssvm_eth_h__ + +#include <vnet/vnet.h> + +#include <vppinfra/elog.h> +#include <vppinfra/error.h> +#include <vppinfra/format.h> +#include <vppinfra/hash.h> +#include <vppinfra/vec.h> +#include <vppinfra/elog.h> +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> +#include <vlibmemory/unix_shared_memory_queue.h> + +#include <ssvm.h> + +vnet_device_class_t ssvm_eth_device_class; +vlib_node_registration_t ssvm_eth_input_node; + +#define SSVM_BUFFER_SIZE \ + (VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES + VLIB_BUFFER_PRE_DATA_SIZE) +#define SSVM_PACKET_TYPE 1 + +typedef struct { + /* Type of queue element */ + u8 type; + u8 flags; +#define SSVM_BUFFER_NEXT_PRESENT (1<<0) + u8 owner; + u8 tag; + i16 current_data_hint; + u16 length_this_buffer; + u16 total_length_not_including_first_buffer; + u16 pad; + u32 next_index; + /* offset 16 */ + u8 data [SSVM_BUFFER_SIZE]; + /* pad to an even multiple of 64 octets */ + u8 pad2[CLIB_CACHE_LINE_BYTES - 16]; +} ssvm_eth_queue_elt_t; + +typedef struct { + /* vector of point-to-point connections */ + ssvm_private_t * intfcs; + + u32 * buffer_cache; + u32 * chunk_cache; + + /* Configurable parameters */ + /* base address for next placement */ + u64 next_base_va; + u64 segment_size; + u64 nbuffers; + u64 queue_elts; + + /* Segment names */ + u8 ** names; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + elog_main_t * elog_main; +} ssvm_eth_main_t; + +ssvm_eth_main_t ssvm_eth_main; + +typedef enum { + CHUNK_POOL_FREELIST_INDEX = 0, + CHUNK_POOL_INDEX, + CHUNK_POOL_NFREE, + TO_MASTER_Q_INDEX, + TO_SLAVE_Q_INDEX, + MASTER_ADMIN_STATE_INDEX, + SLAVE_ADMIN_STATE_INDEX, +} ssvm_eth_opaque_index_t; + +/* + * debug scaffolding. + */ +static inline void ssvm_eth_validate_freelists (int need_lock) +{ +#if CLIB_DEBUG > 0 + ssvm_eth_main_t * em = &ssvm_eth_main; + ssvm_private_t * intfc; + ssvm_shared_header_t * sh; + u32 * elt_indices; + u32 n_available; + int i; + + for (i = 0; i < vec_len (em->intfcs); i++) + { + intfc = em->intfcs + i; + sh = intfc->sh; + u32 my_pid = intfc->my_pid; + + if (need_lock) + ssvm_lock (sh, my_pid, 15); + + elt_indices = (u32 *) (sh->opaque [CHUNK_POOL_FREELIST_INDEX]); + n_available = (u32) (u64) (sh->opaque [CHUNK_POOL_NFREE]); + + for (i = 0; i < n_available; i++) + ASSERT (elt_indices[i] < 2048); + + if (need_lock) + ssvm_unlock (sh); + } +#endif +} + +#endif /* __included_ssvm_eth_h__ */ diff --git a/vnet/vnet/devices/virtio/vhost-user.c b/vnet/vnet/devices/virtio/vhost-user.c new file mode 100644 index 00000000000..4df025c21b6 --- /dev/null +++ b/vnet/vnet/devices/virtio/vhost-user.c @@ -0,0 +1,1957 @@ +/* + *------------------------------------------------------------------ + * vhost.c - vhost-user + * + * Copyright (c) 2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <fcntl.h> /* for open */ +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/uio.h> /* for iovec */ +#include <netinet/in.h> +#include <sys/vfs.h> + +#include <linux/if_arp.h> +#include <linux/if_tun.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> + +#include <vnet/ip/ip.h> + +#include <vnet/ethernet/ethernet.h> + +#include <vnet/devices/virtio/vhost-user.h> + +#define VHOST_USER_DEBUG_SOCKET 0 +#define VHOST_USER_DEBUG_VQ 0 + +/* Set to get virtio_net_hdr in buffer pre-data + details will be shown in packet trace */ +#define VHOST_USER_COPY_TX_HDR 0 + +#if VHOST_USER_DEBUG_SOCKET == 1 +#define DBG_SOCK(args...) clib_warning(args); +#else +#define DBG_SOCK(args...) +#endif + +#if VHOST_USER_DEBUG_VQ == 1 +#define DBG_VQ(args...) clib_warning(args); +#else +#define DBG_VQ(args...) +#endif + +vlib_node_registration_t vhost_user_input_node; + +#define foreach_vhost_user_tx_func_error \ + _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \ + _(MMAP_FAIL, "mmap failure") + +typedef enum { +#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f, + foreach_vhost_user_tx_func_error +#undef _ + VHOST_USER_TX_FUNC_N_ERROR, +} vhost_user_tx_func_error_t; + +static char * vhost_user_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_vhost_user_tx_func_error +#undef _ +}; + +#define foreach_vhost_user_input_func_error \ + _(NO_ERROR, "no error") \ + _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") + +typedef enum { +#define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f, + foreach_vhost_user_input_func_error +#undef _ + VHOST_USER_INPUT_FUNC_N_ERROR, +} vhost_user_input_func_error_t; + +static char * vhost_user_input_func_error_strings[] = { +#define _(n,s) s, + foreach_vhost_user_input_func_error +#undef _ +}; + +static vhost_user_main_t vhost_user_main = { + .mtu_bytes = 1518, +}; + +VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = { + .name = "vhost-user", +}; + +static u8 * format_vhost_user_interface_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u32 show_dev_instance = ~0; + vhost_user_main_t * vum = &vhost_user_main; + + if (i < vec_len (vum->show_dev_instance_by_real_dev_instance)) + show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i]; + + if (show_dev_instance != ~0) + i = show_dev_instance; + + s = format (s, "VirtualEthernet0/0/%d", i); + return s; +} + +static int vhost_user_name_renumber (vnet_hw_interface_t * hi, + u32 new_dev_instance) +{ + vhost_user_main_t * vum = &vhost_user_main; + + vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance, + hi->dev_instance, ~0); + + vum->show_dev_instance_by_real_dev_instance [hi->dev_instance] = + new_dev_instance; + + DBG_SOCK("renumbered vhost-user interface dev_instance %d to %d", + hi->dev_instance, new_dev_instance); + + return 0; +} + + +static inline void * map_guest_mem(vhost_user_intf_t * vui, u64 addr) +{ + int i; + for (i=0; i<vui->nregions; i++) { + if ((vui->regions[i].guest_phys_addr <= addr) && + ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) > addr)) { + return (void *) (vui->region_mmap_addr[i] + addr - vui->regions[i].guest_phys_addr); + } + } + DBG_VQ("failed to map guest mem addr %llx", addr); + return 0; +} + +static inline void * map_user_mem(vhost_user_intf_t * vui, u64 addr) +{ + int i; + for (i=0; i<vui->nregions; i++) { + if ((vui->regions[i].userspace_addr <= addr) && + ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) > addr)) { + return (void *) (vui->region_mmap_addr[i] + addr - vui->regions[i].userspace_addr); + } + } + return 0; +} + +static long get_huge_page_size(int fd) +{ + struct statfs s; + fstatfs(fd, &s); + return s.f_bsize; +} + +static void unmap_all_mem_regions(vhost_user_intf_t * vui) +{ + int i,r; + for (i=0; i<vui->nregions; i++) { + if (vui->region_mmap_addr[i] != (void *) -1) { + + long page_sz = get_huge_page_size(vui->region_mmap_fd[i]); + + ssize_t map_sz = (vui->regions[i].memory_size + + vui->regions[i].mmap_offset + page_sz) & ~(page_sz - 1); + + r = munmap(vui->region_mmap_addr[i] - vui->regions[i].mmap_offset, map_sz); + + DBG_SOCK("unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i, + vui->region_mmap_addr[i], map_sz, page_sz); + + vui->region_mmap_addr[i]= (void *) -1; + + if (r == -1) { + clib_warning("failed to unmap memory region (errno %d)", errno); + } + close(vui->region_mmap_fd[i]); + } + } + vui->nregions = 0; +} + + +static clib_error_t * vhost_user_callfd_read_ready (unix_file_t * uf) +{ + __attribute__((unused)) int n; + u8 buff[8]; + n = read(uf->file_descriptor, ((char*)&buff), 8); + return 0; +} + +static inline void vhost_user_if_disconnect(vhost_user_intf_t * vui) +{ + vhost_user_main_t * vum = &vhost_user_main; + vnet_main_t * vnm = vnet_get_main(); + int q; + + vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); + + if (vui->unix_file_index != ~0) { + unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index); + vui->unix_file_index = ~0; + } + + hash_unset(vum->vhost_user_interface_index_by_sock_fd, vui->unix_fd); + hash_unset(vum->vhost_user_interface_index_by_listener_fd, vui->unix_fd); + close(vui->unix_fd); + vui->unix_fd = -1; + vui->is_up = 0; + for (q = 0; q < vui->num_vrings; q++) { + vui->vrings[q].desc = NULL; + vui->vrings[q].avail = NULL; + vui->vrings[q].used = NULL; + } + + unmap_all_mem_regions(vui); + DBG_SOCK("interface ifindex %d disconnected", vui->sw_if_index); +} + +static clib_error_t * vhost_user_socket_read (unix_file_t * uf) +{ + int n, i; + int fd, number_of_fds = 0; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + vhost_user_msg_t msg; + struct msghdr mh; + struct iovec iov[1]; + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_intf_t * vui; + struct cmsghdr *cmsg; + uword * p; + u8 q; + unix_file_t template = {0}; + vnet_main_t * vnm = vnet_get_main(); + + p = hash_get (vum->vhost_user_interface_index_by_sock_fd, + uf->file_descriptor); + if (p == 0) { + DBG_SOCK ("FD %d doesn't belong to any interface", + uf->file_descriptor); + return 0; + } + else + vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]); + + char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))]; + + memset(&mh, 0, sizeof(mh)); + memset(control, 0, sizeof(control)); + + /* set the payload */ + iov[0].iov_base = (void *) &msg; + iov[0].iov_len = VHOST_USER_MSG_HDR_SZ; + + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = control; + mh.msg_controllen = sizeof(control); + + n = recvmsg(uf->file_descriptor, &mh, 0); + + if (n != VHOST_USER_MSG_HDR_SZ) + goto close_socket; + + if (mh.msg_flags & MSG_CTRUNC) { + goto close_socket; + } + + cmsg = CMSG_FIRSTHDR(&mh); + + if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) && + (cmsg->cmsg_type == SCM_RIGHTS) && + (cmsg->cmsg_len - CMSG_LEN(0) <= VHOST_MEMORY_MAX_NREGIONS * sizeof(int))) { + number_of_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + memcpy(fds, CMSG_DATA(cmsg), number_of_fds * sizeof(int)); + } + + /* version 1, no reply bit set*/ + if ((msg.flags & 7) != 1) { + DBG_SOCK("malformed message received. closing socket"); + goto close_socket; + } + + { + int rv __attribute__((unused)); + /* $$$$ pay attention to rv */ + rv = read(uf->file_descriptor, ((char*)&msg) + n, msg.size); + } + + switch (msg.request) { + case VHOST_USER_GET_FEATURES: + DBG_SOCK("if %d msg VHOST_USER_GET_FEATURES", + vui->hw_if_index); + + msg.flags |= 4; + msg.u64 = (1 << FEAT_VIRTIO_NET_F_MRG_RXBUF) | + (1 << FEAT_VIRTIO_F_ANY_LAYOUT); + msg.u64 &= vui->feature_mask; + + msg.size = sizeof(msg.u64); + break; + + case VHOST_USER_SET_FEATURES: + DBG_SOCK("if %d msg VHOST_USER_SET_FEATURES features 0x%016llx", + vui->hw_if_index, msg.u64); + + vui->features = msg.u64; + if (vui->features & (1 << FEAT_VIRTIO_NET_F_MRG_RXBUF)) + vui->virtio_net_hdr_sz = 12; + else + vui->virtio_net_hdr_sz = 10; + + vui->is_any_layout = (vui->features & (1 << FEAT_VIRTIO_F_ANY_LAYOUT)) ? 1 : 0; + + ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE); + vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); + vui->is_up = 0; + + for (q = 0; q < 2; q++) { + vui->vrings[q].desc = 0; + vui->vrings[q].avail = 0; + vui->vrings[q].used = 0; + } + + DBG_SOCK("interface %d disconnected", vui->sw_if_index); + + break; + + case VHOST_USER_SET_MEM_TABLE: + DBG_SOCK("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d", + vui->hw_if_index, msg.memory.nregions); + + if ((msg.memory.nregions < 1) || + (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS)) { + + DBG_SOCK("number of mem regions must be between 1 and %i", + VHOST_MEMORY_MAX_NREGIONS); + + goto close_socket; + } + + if (msg.memory.nregions != number_of_fds) { + DBG_SOCK("each memory region must have FD"); + goto close_socket; + } + unmap_all_mem_regions(vui); + for(i=0; i < msg.memory.nregions; i++) { + memcpy(&(vui->regions[i]), &msg.memory.regions[i], + sizeof(vhost_user_memory_region_t)); + + long page_sz = get_huge_page_size(fds[i]); + + /* align size to 2M page */ + ssize_t map_sz = (vui->regions[i].memory_size + + vui->regions[i].mmap_offset + page_sz) & ~(page_sz - 1); + + vui->region_mmap_addr[i] = mmap(0, map_sz, PROT_READ | PROT_WRITE, + MAP_SHARED, fds[i], 0); + + DBG_SOCK("map memory region %d addr 0 len 0x%lx fd %d mapped 0x%lx " + "page_sz 0x%x", i, map_sz, fds[i], vui->region_mmap_addr[i], page_sz); + + if (vui->region_mmap_addr[i] == MAP_FAILED) { + clib_warning("failed to map memory. errno is %d", errno); + goto close_socket; + } + vui->region_mmap_addr[i] += vui->regions[i].mmap_offset; + vui->region_mmap_fd[i] = fds[i]; + } + vui->nregions = msg.memory.nregions; + break; + + case VHOST_USER_SET_VRING_NUM: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d", + vui->hw_if_index, msg.state.index, msg.state.num); + + if ((msg.state.num > 32768) || /* maximum ring size is 32768 */ + (msg.state.num == 0) || /* it cannot be zero */ + (msg.state.num % 2)) /* must be power of 2 */ + goto close_socket; + vui->vrings[msg.state.index].qsz = msg.state.num; + break; + + case VHOST_USER_SET_VRING_ADDR: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_ADDR idx %d", + vui->hw_if_index, msg.state.index); + + vui->vrings[msg.state.index].desc = (vring_desc_t *) + map_user_mem(vui, msg.addr.desc_user_addr); + vui->vrings[msg.state.index].used = (vring_used_t *) + map_user_mem(vui, msg.addr.used_user_addr); + vui->vrings[msg.state.index].avail = (vring_avail_t *) + map_user_mem(vui, msg.addr.avail_user_addr); + + if ((vui->vrings[msg.state.index].desc == NULL) || + (vui->vrings[msg.state.index].used == NULL) || + (vui->vrings[msg.state.index].avail == NULL)) { + DBG_SOCK("failed to map user memory for hw_if_index %d", + vui->hw_if_index); + goto close_socket; + } + + vui->vrings[msg.state.index].last_used_idx = + vui->vrings[msg.state.index].used->idx; + + /* tell driver that we don't want interrupts */ + vui->vrings[msg.state.index].used->flags |= 1; + break; + + case VHOST_USER_SET_OWNER: + DBG_SOCK("if %d msg VHOST_USER_SET_OWNER", + vui->hw_if_index); + break; + + case VHOST_USER_RESET_OWNER: + DBG_SOCK("if %d msg VHOST_USER_RESET_OWNER", + vui->hw_if_index); + break; + + case VHOST_USER_SET_VRING_CALL: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_CALL u64 %d", + vui->hw_if_index, msg.u64); + + q = (u8) (msg.u64 & 0xFF); + + if (!(msg.u64 & 0x100)) + { + if (number_of_fds != 1) + goto close_socket; + + /* if there is old fd, delete it */ + if (vui->vrings[q].callfd) { + unix_file_t * uf = pool_elt_at_index (unix_main.file_pool, + vui->vrings[q].callfd_idx); + unix_file_del (&unix_main, uf); + } + vui->vrings[q].callfd = fds[0]; + template.read_function = vhost_user_callfd_read_ready; + template.file_descriptor = fds[0]; + vui->vrings[q].callfd_idx = unix_file_add (&unix_main, &template); + } + else + vui->vrings[q].callfd = -1; + break; + + case VHOST_USER_SET_VRING_KICK: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_KICK u64 %d", + vui->hw_if_index, msg.u64); + + q = (u8) (msg.u64 & 0xFF); + + if (!(msg.u64 & 0x100)) + { + if (number_of_fds != 1) + goto close_socket; + + vui->vrings[q].kickfd = fds[0]; + } + else + vui->vrings[q].kickfd = -1; + break; + + case VHOST_USER_SET_VRING_ERR: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_ERR u64 %d", + vui->hw_if_index, msg.u64); + + q = (u8) (msg.u64 & 0xFF); + + if (!(msg.u64 & 0x100)) + { + if (number_of_fds != 1) + goto close_socket; + + fd = fds[0]; + } + else + fd = -1; + + vui->vrings[q].errfd = fd; + break; + + case VHOST_USER_SET_VRING_BASE: + DBG_SOCK("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d", + vui->hw_if_index, msg.state.index, msg.state.num); + + vui->vrings[msg.state.index].last_avail_idx = msg.state.num; + break; + + case VHOST_USER_GET_VRING_BASE: + DBG_SOCK("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d", + vui->hw_if_index, msg.state.index, msg.state.num); + + msg.state.num = vui->vrings[msg.state.index].last_used_idx; + msg.flags |= 4; + msg.size = sizeof(msg.state); + break; + + case VHOST_USER_NONE: + DBG_SOCK("if %d msg VHOST_USER_NONE", + vui->hw_if_index); + + break; + + case VHOST_USER_SET_LOG_BASE: + DBG_SOCK("if %d msg VHOST_USER_SET_LOG_BASE", + vui->hw_if_index); + + break; + + case VHOST_USER_SET_LOG_FD: + DBG_SOCK("if %d msg VHOST_USER_SET_LOG_FD", + vui->hw_if_index); + + break; + + default: + DBG_SOCK("unknown vhost-user message %d received. closing socket", + msg.request); + goto close_socket; + } + + /* if we have pointers to descriptor table, go up*/ + if (!vui->is_up && + vui->vrings[VHOST_NET_VRING_IDX_TX].desc && + vui->vrings[VHOST_NET_VRING_IDX_RX].desc) { + + DBG_SOCK("interface %d connected", vui->sw_if_index); + + vnet_hw_interface_set_flags (vnm, vui->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); + vui->is_up = 1; + + } + + /* if we need to reply */ + if (msg.flags & 4) + { + n = send(uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); + if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) + goto close_socket; + } + + return 0; + +close_socket: + vhost_user_if_disconnect(vui); + return 0; +} + +static clib_error_t * vhost_user_socket_error (unix_file_t * uf) +{ + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_intf_t * vui; + uword * p; + + p = hash_get (vum->vhost_user_interface_index_by_sock_fd, + uf->file_descriptor); + if (p == 0) { + DBG_SOCK ("fd %d doesn't belong to any interface", + uf->file_descriptor); + return 0; + } + else + vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]); + + vhost_user_if_disconnect(vui); + return 0; +} + +static clib_error_t * vhost_user_socksvr_accept_ready (unix_file_t * uf) +{ + int client_fd, client_len; + struct sockaddr_un client; + unix_file_t template = {0}; + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_intf_t * vui; + uword * p; + + p = hash_get (vum->vhost_user_interface_index_by_listener_fd, + uf->file_descriptor); + if (p == 0) { + DBG_SOCK ("fd %d doesn't belong to any interface", + uf->file_descriptor); + return 0; + } + else + vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]); + + client_len = sizeof(client); + client_fd = accept (uf->file_descriptor, + (struct sockaddr *)&client, + (socklen_t *)&client_len); + + if (client_fd < 0) + return clib_error_return_unix (0, "accept"); + + template.read_function = vhost_user_socket_read; + template.error_function = vhost_user_socket_error; + template.file_descriptor = client_fd; + vui->unix_file_index = unix_file_add (&unix_main, &template); + + vui->client_fd = client_fd; + hash_set (vum->vhost_user_interface_index_by_sock_fd, vui->client_fd, + vui - vum->vhost_user_interfaces); + + return 0; +} + +static clib_error_t * +vhost_user_init (vlib_main_t * vm) +{ + clib_error_t * error; + vhost_user_main_t * vum = &vhost_user_main; + vlib_thread_main_t * tm = vlib_get_thread_main(); + + error = vlib_call_init_function (vm, ip4_init); + if (error) + return error; + + vum->vhost_user_interface_index_by_listener_fd = hash_create (0, sizeof (uword)); + vum->vhost_user_interface_index_by_sock_fd = hash_create (0, sizeof (uword)); + vum->vhost_user_interface_index_by_sw_if_index = hash_create (0, sizeof (uword)); + vum->coalesce_frames = 32; + vum->coalesce_time = 1e-3; + + vec_validate_aligned (vum->rx_buffers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + return 0; +} + +VLIB_INIT_FUNCTION (vhost_user_init); + +static clib_error_t * +vhost_user_exit (vlib_main_t * vm) +{ + /* TODO cleanup */ + return 0; +} + +VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit); + +enum { + VHOST_USER_RX_NEXT_ETHERNET_INPUT, + VHOST_USER_RX_NEXT_DROP, + VHOST_USER_RX_N_NEXT, +}; + + +typedef struct { + u16 virtqueue; + u16 device_index; +#if VHOST_USER_COPY_TX_HDR == 1 + virtio_net_hdr_t hdr; +#endif +} vhost_user_input_trace_t; + +static u8 * format_vhost_user_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main(); + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_input_trace_t * t = va_arg (*va, vhost_user_input_trace_t *); + vhost_user_intf_t * vui = vec_elt_at_index (vum->vhost_user_interfaces, + t->device_index); + + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, vui->sw_if_index); + +#if VHOST_USER_COPY_TX_HDR == 1 + uword indent = format_get_indent (s); +#endif + + s = format (s, "%U virtqueue %d", + format_vnet_sw_interface_name, vnm, sw, + t->virtqueue); + +#if VHOST_USER_COPY_TX_HDR == 1 + s = format (s, "\n%Uvirtio_net_hdr flags 0x%02x gso_type %u hdr_len %u", + format_white_space, indent, + t->hdr.flags, + t->hdr.gso_type, + t->hdr.hdr_len); +#endif + + return s; +} + +void vhost_user_rx_trace (vlib_main_t * vm, + vlib_node_runtime_t * node, + vhost_user_intf_t *vui, + i16 virtqueue) +{ + u32 * b, n_left; + vhost_user_main_t * vum = &vhost_user_main; + + u32 next_index = VHOST_USER_RX_NEXT_ETHERNET_INPUT; + + n_left = vec_len(vui->d_trace_buffers); + b = vui->d_trace_buffers; + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + vhost_user_input_trace_t * t0; + + bi0 = b[0]; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + vlib_trace_buffer (vm, node, next_index, b0, /* follow_chain */ 0); + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->virtqueue = virtqueue; + t0->device_index = vui - vum->vhost_user_interfaces; +#if VHOST_USER_COPY_TX_HDR == 1 + rte_memcpy(&t0->hdr, b0->pre_data, sizeof(virtio_net_hdr_t)); +#endif + + b+=1; + } +} + +static inline void vhost_user_send_call(vlib_main_t * vm, vhost_user_vring_t * vq) +{ + vhost_user_main_t * vum = &vhost_user_main; + u64 x = 1; + int rv __attribute__((unused)); + /* $$$$ pay attention to rv */ + rv = write(vq->callfd, &x, sizeof(x)); + vq->n_since_last_int = 0; + vq->int_deadline = vlib_time_now(vm) + vum->coalesce_time; +} + +static u32 vhost_user_if_input ( vlib_main_t * vm, + vhost_user_main_t * vum, + vhost_user_intf_t * vui, + vlib_node_runtime_t * node) +{ + vhost_user_vring_t * txvq = &vui->vrings[VHOST_NET_VRING_IDX_TX]; + vhost_user_vring_t * rxvq = &vui->vrings[VHOST_NET_VRING_IDX_RX]; + uword n_rx_packets = 0; + uword n_left; + u32 bi; + u32 n_left_to_next, * to_next; + u32 next_index = VHOST_USER_RX_NEXT_ETHERNET_INPUT; + uword n_rx_bytes = 0; + uword n_trace = vlib_get_trace_count (vm, node); + u16 qsz_mask; + f64 now = vlib_time_now (vm); + u32 cpu_index; + + vec_reset_length (vui->d_trace_buffers); + u32 free_list_index = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX; + + /* no descriptor ptr - bail out */ + if (PREDICT_FALSE(!txvq->desc)) + return 0; + + /* do we have pending intterupts ? */ + if ((txvq->n_since_last_int) && (txvq->int_deadline < now)) + vhost_user_send_call(vm, txvq); + + if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now)) + vhost_user_send_call(vm, rxvq); + + /* only bit 0 of avail.flags is used so we don't want to deal with this + interface if any other bit is set */ + if (PREDICT_FALSE(txvq->avail->flags & 0xFFFE)) + return 0; + + /* nothing to do */ + if (txvq->avail->idx == txvq->last_avail_idx) + return 0; + + cpu_index = os_get_cpu_number(); + + if (PREDICT_TRUE(txvq->avail->idx > txvq->last_avail_idx)) + n_left = txvq->avail->idx - txvq->last_avail_idx; + else /* wrapped */ + n_left = (u16) -1 - txvq->last_avail_idx + txvq->avail->idx; + + if (PREDICT_FALSE(!vui->admin_up)) { + /* if intf is admin down, just drop all packets waiting in the ring */ + txvq->last_avail_idx = txvq->last_used_idx = txvq->avail->idx; + CLIB_MEMORY_BARRIER(); + txvq->used->idx = txvq->last_used_idx; + vhost_user_send_call(vm, txvq); + + return 0; + } + + if (PREDICT_FALSE(n_left > txvq->qsz)) { + return 0; + } + + if (PREDICT_FALSE(n_left > VLIB_FRAME_SIZE)) + n_left = VLIB_FRAME_SIZE; + + /* Make sure we have some RX buffers. */ + { + uword l = vec_len (vum->rx_buffers[cpu_index]); + uword n_alloc; + + if (l < n_left) + { + if (! vum->rx_buffers[cpu_index]) { + vec_alloc (vum->rx_buffers[cpu_index], 2 * VLIB_FRAME_SIZE ); + } + + n_alloc = vlib_buffer_alloc_from_free_list + (vm, vum->rx_buffers[cpu_index] + l, 2 * VLIB_FRAME_SIZE - l, + free_list_index); + if (n_alloc == 0) + return 0; + _vec_len (vum->rx_buffers[cpu_index]) = l + n_alloc; + } + } + + qsz_mask = txvq->qsz - 1; + + while (n_left > 0) { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left > 0 && n_left_to_next > 0) { + vlib_buffer_t * b; + u16 desc_chain_head = txvq->avail->ring[txvq->last_avail_idx & qsz_mask]; + u16 desc_current = desc_chain_head; + uword i_rx = vec_len (vum->rx_buffers[cpu_index]) - 1; + + bi = vum->rx_buffers[cpu_index][i_rx]; + b = vlib_get_buffer (vm, bi); + + vlib_prefetch_buffer_with_index (vm, vum->rx_buffers[cpu_index][i_rx-1], STORE); + + uword offset; + if (PREDICT_TRUE(vui->is_any_layout)) + offset = vui->virtio_net_hdr_sz; + else if (!(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)) + /* WSA case, no ANYLAYOUT but single buffer */ + offset = vui->virtio_net_hdr_sz; + else + /* CSR case without ANYLAYOUT, skip 1st buffer */ + offset = txvq->desc[desc_current].len; + + uword ptr=0; + + while(1) { + void * buffer_addr = map_guest_mem(vui, txvq->desc[desc_current].addr); + CLIB_PREFETCH (&txvq->desc[txvq->desc[desc_current].next], sizeof (vring_desc_t), READ); + +#if VHOST_USER_COPY_TX_HDR == 1 + if (PREDICT_TRUE(offset)) { + rte_memcpy(b->pre_data, buffer_addr, sizeof(virtio_net_hdr_t)); /* 12 byte hdr is not used on tx */ + } +#endif + + if (txvq->desc[desc_current].len > offset) { + u16 len = txvq->desc[desc_current].len - offset; + + if (PREDICT_FALSE(len > VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES)) + len = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + + rte_memcpy(vlib_buffer_get_current (b) + ptr, + buffer_addr + offset, len); + } + ptr += txvq->desc[desc_current].len - offset; + offset = 0; + + /* if next flag is set, take next desc in the chain */ + if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT ) + desc_current = txvq->desc[desc_current].next; + else + break; + } + + txvq->last_avail_idx++; + + /* returning buffer */ + txvq->used->ring[txvq->last_used_idx & qsz_mask].id = desc_chain_head; + txvq->used->ring[txvq->last_used_idx & qsz_mask].len = ptr + vui->virtio_net_hdr_sz; + + txvq->last_used_idx++; + + b->current_length = ptr; + + if(PREDICT_FALSE(b->current_length < 14)) { + vlib_error_count(vm, vhost_user_input_node.index, + VHOST_USER_INPUT_FUNC_ERROR_UNDERSIZED_FRAME, 1); + goto skip_frame; + } + + b->flags = 0; + b->current_data = 0; + b->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + n_rx_bytes += ptr; + _vec_len (vum->rx_buffers[cpu_index]) = i_rx; + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See .../vlib/vlib/buffer.h + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b); + + vnet_buffer (b)->sw_if_index[VLIB_RX] = vui->sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32)~0; + b->error = node->errors[0]; + + to_next[0] = bi; + to_next++; + n_left_to_next--; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi, next_index); + + if (PREDICT_FALSE (n_trace > n_rx_packets)) + vec_add1 (vui->d_trace_buffers, bi); + + n_rx_packets++; +skip_frame: + n_left--; + } + + /* give buffers back to driver */ + CLIB_MEMORY_BARRIER(); + txvq->used->idx = txvq->last_used_idx; + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + if (PREDICT_FALSE (vec_len (vui->d_trace_buffers) > 0)) + { + vhost_user_rx_trace (vm, node, vui, VHOST_NET_VRING_IDX_TX); + vlib_set_trace_count (vm, node, n_trace - vec_len (vui->d_trace_buffers)); + } + + /* if no packets received we're done */ + if(!n_rx_packets) + return 0; + + /* interrupt (call) handling */ + if((txvq->callfd > 0) && !(txvq->avail->flags & 1)) { + txvq->n_since_last_int += n_rx_packets; + + if(txvq->n_since_last_int > vum->coalesce_frames) + vhost_user_send_call(vm, txvq); + } + + /* increase rx counters */ + vlib_increment_combined_counter + (vnet_main.interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + os_get_cpu_number(), + vui->sw_if_index, + n_rx_packets, n_rx_bytes); + + return n_rx_packets; +} + +static uword +vhost_user_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + vhost_user_main_t * vum = &vhost_user_main; + dpdk_main_t * dm = &dpdk_main; + vhost_user_intf_t * vui; + uword n_rx_packets = 0; + u32 cpu_index = os_get_cpu_number(); + int i; + + for(i = 0; i < vec_len(vum->vhost_user_interfaces); i++ ) + { + vui = vec_elt_at_index(vum->vhost_user_interfaces, i); + if (!vui->is_up || + (i % dm->input_cpu_count) == (cpu_index - dm->input_cpu_first_index)) + continue; + n_rx_packets += + vhost_user_if_input (vm, vum, vui, node); + } + return n_rx_packets; +} + +VLIB_REGISTER_NODE (vhost_user_input_node) = { + .function = vhost_user_input, + .type = VLIB_NODE_TYPE_INPUT, + .name = "vhost-user-input", + + /* Will be enabled if/when hardware is detected. */ + .state = VLIB_NODE_STATE_DISABLED, + + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_vhost_user_input_trace, + + .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR, + .error_strings = vhost_user_input_func_error_strings, + + .n_next_nodes = VHOST_USER_RX_N_NEXT, + .next_nodes = { + [VHOST_USER_RX_NEXT_DROP] = "error-drop", + [VHOST_USER_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", + }, +}; + +static uword +vhost_user_intfc_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * buffers = vlib_frame_args (frame); + u32 n_left = 0; + u16 used_index; + vhost_user_main_t * vum = &vhost_user_main; + uword n_packets = 0; + uword n_avail_desc; + vnet_interface_output_runtime_t * rd = (void *) node->runtime_data; + vhost_user_intf_t * vui = vec_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance); + vhost_user_vring_t * rxvq = &vui->vrings[VHOST_NET_VRING_IDX_RX]; + u16 qsz_mask; + + if (PREDICT_FALSE(!vui->is_up)) + goto done2; + + if (PREDICT_FALSE(!rxvq->desc)) + goto done2; + + if (PREDICT_FALSE(vui->lockp != 0)) + { + while (__sync_lock_test_and_set (vui->lockp, 1)) + ; + } + + + /* only bit 0 of avail.flags is used so we don't want to deal with this + interface if any other bit is set */ + if (PREDICT_FALSE(rxvq->avail->flags & 0xFFFE)) + goto done2; + + if (PREDICT_FALSE((rxvq->avail->idx == rxvq->last_avail_idx) || + vui->sock_errno != 0)) { + vlib_simple_counter_main_t * cm; + vnet_main_t * vnm = vnet_get_main(); + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + vlib_increment_simple_counter (cm, os_get_cpu_number(), + 0, frame->n_vectors); + + vlib_error_count (vm, node->node_index, + VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF, + frame->n_vectors); + goto done2; + } + + if (PREDICT_TRUE(rxvq->avail->idx > rxvq->last_avail_idx)) + n_avail_desc = rxvq->avail->idx - rxvq->last_avail_idx; + else /* wrapped */ + n_avail_desc = (u16) -1 - rxvq->last_avail_idx + rxvq->avail->idx; + + DBG_VQ("rxvq->avail->idx %d rxvq->last_avail_idx %d n_avail_desc %d", + rxvq->avail->idx, rxvq->last_avail_idx, n_avail_desc); + + n_left = n_packets = frame->n_vectors; + if (PREDICT_FALSE(n_packets > n_avail_desc)) { + vlib_simple_counter_main_t * cm; + vnet_main_t * vnm = vnet_get_main(); + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + vlib_increment_simple_counter (cm, os_get_cpu_number(), + 0, frame->n_vectors); + + vlib_error_count (vm, node->node_index, + VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF, + n_packets - n_avail_desc); + n_left = n_packets = n_avail_desc; + } + + used_index = rxvq->used->idx; + qsz_mask = rxvq->qsz - 1; /* qsz is always power of 2 */ + + while (n_left >= 4) + { + vlib_buffer_t * b0, * b1; + u16 desc_chain_head0,desc_chain_head1; + u16 desc_current0,desc_current1; + uword offset0, offset1; + u16 bytes_left0, bytes_left1; + void *buffer_addr0, *buffer_addr1; + + vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD); + vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD); + + b0 = vlib_get_buffer (vm, buffers[0]); + b1 = vlib_get_buffer (vm, buffers[1]); + buffers+=2; + n_left-=2; + + desc_current0 = desc_chain_head0 = rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask]; + desc_current1 = desc_chain_head1 = rxvq->avail->ring[(rxvq->last_avail_idx+1) & qsz_mask]; + + offset0 = vui->virtio_net_hdr_sz; + + offset1 = vui->virtio_net_hdr_sz; + + bytes_left0 = b0->current_length; + bytes_left1 = b1->current_length; + + buffer_addr0 = map_guest_mem(vui, rxvq->desc[desc_current0].addr); + buffer_addr1 = map_guest_mem(vui, rxvq->desc[desc_current1].addr); + + if (PREDICT_FALSE(!buffer_addr0)) { + vlib_error_count (vm, node->node_index, VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); + goto done; + } + if (PREDICT_FALSE(!buffer_addr1)) { + vlib_error_count (vm, node->node_index, VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); + goto done; + } + + virtio_net_hdr_mrg_rxbuf_t * hdr0 = (virtio_net_hdr_mrg_rxbuf_t *) buffer_addr0; + virtio_net_hdr_mrg_rxbuf_t * hdr1 = (virtio_net_hdr_mrg_rxbuf_t *) buffer_addr1; + hdr0->hdr.flags = 0; + hdr1->hdr.flags = 0; + hdr0->hdr.gso_type = 0; + hdr1->hdr.gso_type = 0; + + if (vui->virtio_net_hdr_sz == 12) { + hdr0->num_buffers = 1; + hdr1->num_buffers = 1; + } + + buffer_addr0 += offset0; + buffer_addr1 += offset1; + + if (PREDICT_FALSE(!vui->is_any_layout && rxvq->desc[desc_current0].flags & VIRTQ_DESC_F_NEXT)) + rxvq->desc[desc_current0].len = vui->virtio_net_hdr_sz; + + if (PREDICT_FALSE(!vui->is_any_layout && rxvq->desc[desc_current1].flags & VIRTQ_DESC_F_NEXT)) + rxvq->desc[desc_current1].len = vui->virtio_net_hdr_sz; + + while(1) { + if (rxvq->desc[desc_current0].len - offset0 > 0 ) { + u16 bytes_to_copy = bytes_left0 > (rxvq->desc[desc_current0].len - offset0) ? (rxvq->desc[desc_current0].len - offset0) : bytes_left0; + rte_memcpy(buffer_addr0, vlib_buffer_get_current (b0) + b0->current_length - bytes_left0, bytes_to_copy); + bytes_left0 -= bytes_to_copy; + } + + if (rxvq->desc[desc_current0].flags & VIRTQ_DESC_F_NEXT ) { + offset0 = 0; + desc_current0 = rxvq->desc[desc_current1].next; + buffer_addr0 = map_guest_mem(vui, rxvq->desc[desc_current0].addr); + if (PREDICT_FALSE(!buffer_addr0)) { + vlib_error_count (vm, node->node_index, VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); + goto done; + } + } + else + break; + } + + while(1) { + if (rxvq->desc[desc_current1].len - offset1 > 0 ) { + u16 bytes_to_copy = bytes_left1 > (rxvq->desc[desc_current1].len - offset1) ? (rxvq->desc[desc_current1].len - offset1) : bytes_left1; + rte_memcpy(buffer_addr1, vlib_buffer_get_current (b1) + b1->current_length - bytes_left1, bytes_to_copy); + bytes_left1 -= bytes_to_copy; + } + + if (rxvq->desc[desc_current1].flags & VIRTQ_DESC_F_NEXT ) { + offset1 = 0; + desc_current1 = rxvq->desc[desc_current1].next; + buffer_addr1 = map_guest_mem(vui, rxvq->desc[desc_current1].addr); + if (PREDICT_FALSE(!buffer_addr1)) { + vlib_error_count (vm, node->node_index, VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); + goto done; + } + } + else + break; + } + + rxvq->used->ring[used_index & qsz_mask].id = desc_chain_head0; + rxvq->used->ring[used_index & qsz_mask].len = b0->current_length + vui->virtio_net_hdr_sz; + used_index+=1; + rxvq->used->ring[used_index & qsz_mask].id = desc_chain_head1; + rxvq->used->ring[used_index & qsz_mask].len = b1->current_length + vui->virtio_net_hdr_sz; + used_index+=1; + rxvq->last_avail_idx+=2; + } + + while (n_left > 0) + { + vlib_buffer_t * b0; + u16 desc_chain_head; + u16 desc_current; + void *buffer_addr; + + b0 = vlib_get_buffer (vm, buffers[0]); + buffers++; + n_left--; + + desc_chain_head = rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask]; + desc_current = desc_chain_head; + + uword offset = vui->virtio_net_hdr_sz; + + u16 bytes_left = b0->current_length; + buffer_addr = map_guest_mem(vui, rxvq->desc[desc_current].addr); + if (PREDICT_FALSE(!buffer_addr)) { + vlib_error_count (vm, node->node_index, VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); + goto done; + } + + virtio_net_hdr_mrg_rxbuf_t * hdr = (virtio_net_hdr_mrg_rxbuf_t *) buffer_addr; + hdr->hdr.flags = 0; + hdr->hdr.gso_type = 0; + + if (vui->virtio_net_hdr_sz == 12) { + hdr->num_buffers = 1; + } + + buffer_addr += offset; + + if (PREDICT_FALSE(!vui->is_any_layout && rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)) + rxvq->desc[desc_current].len = vui->virtio_net_hdr_sz; + + while(1) { + if (rxvq->desc[desc_current].len - offset > 0 ) { + u16 bytes_to_copy = bytes_left > (rxvq->desc[desc_current].len - offset) ? (rxvq->desc[desc_current].len - offset) : bytes_left; + rte_memcpy(buffer_addr, vlib_buffer_get_current (b0) + b0->current_length - bytes_left, bytes_to_copy); + bytes_left -= bytes_to_copy; + } + + if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT ) { + offset = 0; + desc_current = rxvq->desc[desc_current].next; + buffer_addr = map_guest_mem(vui, rxvq->desc[desc_current].addr); + if (PREDICT_FALSE(!buffer_addr)) { + vlib_error_count (vm, node->node_index, VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); + goto done; + } + } + else + break; + } + + rxvq->used->ring[used_index & qsz_mask].id = desc_chain_head; + rxvq->used->ring[used_index & qsz_mask].len = b0->current_length + vui->virtio_net_hdr_sz; + + used_index++; + rxvq->last_avail_idx++; + } + +done: + CLIB_MEMORY_BARRIER(); + rxvq->used->idx = used_index; + + /* interrupt (call) handling */ + if((rxvq->callfd > 0) && !(rxvq->avail->flags & 1)) { + rxvq->n_since_last_int += n_packets - n_left; + + if(rxvq->n_since_last_int > vum->coalesce_frames) + vhost_user_send_call(vm, rxvq); + } + +done2: + + if (PREDICT_FALSE(vui->lockp != 0)) + *vui->lockp = 0; + + vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + return frame->n_vectors; +} + +static clib_error_t * +vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t * hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_intf_t * vui = vec_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance); + + vui->admin_up = is_up; + + if (is_up) + vnet_hw_interface_set_flags (vnm, vui->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + + return /* no error */ 0; +} + +VNET_DEVICE_CLASS (vhost_user_dev_class,static) = { + .name = "vhost-user", + .tx_function = vhost_user_intfc_tx, + .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR, + .tx_function_error_strings = vhost_user_tx_func_error_strings, + .format_device_name = format_vhost_user_interface_name, + .name_renumber = vhost_user_name_renumber, + .admin_up_down_function = vhost_user_interface_admin_up_down, +}; + +static uword +vhost_user_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_intf_t * vui; + struct sockaddr_un sun; + int sockfd; + unix_file_t template = {0}; + f64 timeout = 3153600000.0 /* 100 years */; + uword *event_data = 0; + + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + sun.sun_family = AF_UNIX; + template.read_function = vhost_user_socket_read; + template.error_function = vhost_user_socket_error; + + + if (sockfd < 0) + return 0; + + while (1) { + vlib_process_wait_for_event_or_clock (vm, timeout); + vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + timeout = 3.0; + + vec_foreach (vui, vum->vhost_user_interfaces) { + + if (vui->sock_is_server || !vui->active) + continue; + + if (vui->unix_fd == -1) { + /* try to connect */ + + strncpy(sun.sun_path, (char *) vui->sock_filename, sizeof(sun.sun_path) - 1); + + if (connect(sockfd, (struct sockaddr *) &sun, sizeof(struct sockaddr_un)) == 0) { + vui->sock_errno = 0; + vui->unix_fd = sockfd; + template.file_descriptor = sockfd; + vui->unix_file_index = unix_file_add (&unix_main, &template); + hash_set (vum->vhost_user_interface_index_by_sock_fd, sockfd, vui - vum->vhost_user_interfaces); + + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd < 0) + return 0; + } + else { + vui->sock_errno = errno; + } + } else { + /* check if socket is alive */ + int error = 0; + socklen_t len = sizeof (error); + int retval = getsockopt(vui->unix_fd, SOL_SOCKET, SO_ERROR, &error, &len); + + if (retval) + vhost_user_if_disconnect(vui); + } + } + } + return 0; +} + +VLIB_REGISTER_NODE (vhost_user_process_node,static) = { + .function = vhost_user_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "vhost-user-process", +}; + +int vhost_user_delete_if(vnet_main_t * vnm, vlib_main_t * vm, + u32 sw_if_index) +{ + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_intf_t * vui; + uword *p = NULL; + int rv = 0; + + p = hash_get (vum->vhost_user_interface_index_by_sw_if_index, + sw_if_index); + if (p == 0) { + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + } else { + vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]); + } + + // interface is inactive + vui->active = 0; + // disconnect interface sockets + vhost_user_if_disconnect(vui); + // add to inactive interface list + vec_add1 (vum->vhost_user_inactive_interfaces_index, p[0]); + + // reset renumbered iface + if (p[0] < vec_len (vum->show_dev_instance_by_real_dev_instance)) + vum->show_dev_instance_by_real_dev_instance[p[0]] = ~0; + + ethernet_delete_interface (vnm, vui->hw_if_index); + DBG_SOCK ("deleted (deactivated) vhost-user interface instance %d", p[0]); + + return rv; +} + +// init server socket on specified sock_filename +static int vhost_user_init_server_sock(const char * sock_filename, int *sockfd) +{ + int rv = 0, len; + struct sockaddr_un un; + int fd; + /* create listening socket */ + fd = socket(AF_UNIX, SOCK_STREAM, 0); + + if (fd < 0) { + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + + un.sun_family = AF_UNIX; + strcpy((char *) un.sun_path, (char *) sock_filename); + + /* remove if exists */ + unlink( (char *) sock_filename); + + len = strlen((char *) un.sun_path) + strlen((char *) sock_filename); + + if (bind(fd, (struct sockaddr *) &un, len) == -1) { + rv = VNET_API_ERROR_SYSCALL_ERROR_2; + goto error; + } + + if (listen(fd, 1) == -1) { + rv = VNET_API_ERROR_SYSCALL_ERROR_3; + goto error; + } + + unix_file_t template = {0}; + template.read_function = vhost_user_socksvr_accept_ready; + template.file_descriptor = fd; + unix_file_add (&unix_main, &template); + *sockfd = fd; + return rv; + +error: + close(fd); + return rv; +} + +// get new vhost_user_intf_t from inactive interfaces or create new one +static vhost_user_intf_t *vhost_user_vui_new() +{ + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_intf_t * vui = NULL; + int inactive_cnt = vec_len(vum->vhost_user_inactive_interfaces_index); + // if there are any inactive ifaces + if (inactive_cnt > 0) { + // take last + u32 vui_idx = vum->vhost_user_inactive_interfaces_index[inactive_cnt - 1]; + if (vec_len(vum->vhost_user_interfaces) > vui_idx) { + vui = vec_elt_at_index (vum->vhost_user_interfaces, vui_idx); + DBG_SOCK("reusing inactive vhost-user interface index %d", vui_idx); + } + // "remove" from inactive list + _vec_len(vum->vhost_user_inactive_interfaces_index) -= 1; + } + + // vui was not retrieved from inactive ifaces - create new + if (!vui) + vec_add2 (vum->vhost_user_interfaces, vui, 1); + return vui; +} + +// create ethernet interface for vhost user intf +static void vhost_user_create_ethernet(vnet_main_t * vnm, vlib_main_t * vm, + vhost_user_intf_t *vui) +{ + vhost_user_main_t * vum = &vhost_user_main; + u8 hwaddr[6]; + clib_error_t * error; + + /* create hw and sw interface */ + { + f64 now = vlib_time_now(vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (hwaddr+2, &rnd, sizeof(rnd)); + hwaddr[0] = 2; + hwaddr[1] = 0xfe; + } + + error = ethernet_register_interface + (vnm, + vhost_user_dev_class.index, + vui - vum->vhost_user_interfaces /* device instance */, + hwaddr /* ethernet address */, + &vui->hw_if_index, + 0 /* flag change */); + if (error) + clib_error_report (error); +} + +// initialize vui with specified attributes +static void vhost_user_vui_init(vnet_main_t * vnm, + vhost_user_intf_t *vui, int sockfd, + const char * sock_filename, + u8 is_server, u64 feature_mask, + u32 * sw_if_index) +{ + vnet_sw_interface_t * sw; + sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); + vlib_thread_main_t * tm = vlib_get_thread_main(); + + vui->unix_fd = sockfd; + vui->sw_if_index = sw->sw_if_index; + vui->num_vrings = 2; + vui->sock_is_server = is_server; + strncpy(vui->sock_filename, sock_filename, ARRAY_LEN(vui->sock_filename)-1); + vui->sock_errno = 0; + vui->is_up = 0; + vui->feature_mask = feature_mask; + vui->active = 1; + vui->unix_file_index = ~0; + + vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); + + if (sw_if_index) + *sw_if_index = vui->sw_if_index; + + if (tm->n_vlib_mains > 1) + { + vui->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) vui->lockp, 0, CLIB_CACHE_LINE_BYTES); + } +} + +// register vui and start polling on it +static void vhost_user_vui_register(vlib_main_t * vm, vhost_user_intf_t *vui) +{ + vhost_user_main_t * vum = &vhost_user_main; + dpdk_main_t * dm = &dpdk_main; + int cpu_index; + vlib_thread_main_t * tm = vlib_get_thread_main(); + + hash_set (vum->vhost_user_interface_index_by_listener_fd, vui->unix_fd, + vui - vum->vhost_user_interfaces); + hash_set (vum->vhost_user_interface_index_by_sw_if_index, vui->sw_if_index, + vui - vum->vhost_user_interfaces); + + /* start polling */ + cpu_index = dm->input_cpu_first_index + + (vui - vum->vhost_user_interfaces) % dm->input_cpu_count; + + if (tm->n_vlib_mains == 1) + vlib_node_set_state (vm, vhost_user_input_node.index, + VLIB_NODE_STATE_POLLING); + else if (!dm->have_io_threads) + vlib_node_set_state (vlib_mains[cpu_index], vhost_user_input_node.index, + VLIB_NODE_STATE_POLLING); + + /* tell process to start polling for sockets */ + vlib_process_signal_event(vm, vhost_user_process_node.index, 0, 0); +} + +int vhost_user_create_if(vnet_main_t * vnm, vlib_main_t * vm, + const char * sock_filename, + u8 is_server, + u32 * sw_if_index, + u64 feature_mask, + u8 renumber, u32 custom_dev_instance) +{ + vhost_user_intf_t * vui = NULL; + dpdk_main_t * dm = &dpdk_main; + vlib_thread_main_t * tm = vlib_get_thread_main(); + u32 sw_if_idx = ~0; + int sockfd = -1; + int rv = 0; + + if (tm->n_vlib_mains > 1 && dm->have_io_threads) + { + clib_warning("vhost-user interfaces are not supported with multiple io threads"); + return -1; + } + + if (is_server) { + if ((rv = vhost_user_init_server_sock (sock_filename, &sockfd)) != 0) { + return rv; + } + } + + vui = vhost_user_vui_new (); + ASSERT(vui != NULL); + + vhost_user_create_ethernet (vnm, vm, vui); + vhost_user_vui_init (vnm, vui, sockfd, sock_filename, is_server, + feature_mask, &sw_if_idx); + + if (renumber) { + vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); + } + + vhost_user_vui_register (vm, vui); + + if (sw_if_index) + *sw_if_index = sw_if_idx; + + return rv; +} + +int vhost_user_modify_if(vnet_main_t * vnm, vlib_main_t * vm, + const char * sock_filename, + u8 is_server, + u32 sw_if_index, + u64 feature_mask, + u8 renumber, u32 custom_dev_instance) +{ + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_intf_t * vui = NULL; + u32 sw_if_idx = ~0; + int sockfd = -1; + int rv = 0; + uword *p = NULL; + + p = hash_get (vum->vhost_user_interface_index_by_sw_if_index, + sw_if_index); + if (p == 0) { + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + } else { + vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]); + } + + // interface is inactive + vui->active = 0; + // disconnect interface sockets + vhost_user_if_disconnect(vui); + + if (is_server) { + if ((rv = vhost_user_init_server_sock (sock_filename, &sockfd)) != 0) { + return rv; + } + } + + vhost_user_vui_init (vnm, vui, sockfd, sock_filename, is_server, + feature_mask, &sw_if_idx); + + if (renumber) { + vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); + } + + vhost_user_vui_register (vm, vui); + + return rv; +} + +clib_error_t * +vhost_user_connect_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + u8 * sock_filename = NULL; + u32 sw_if_index; + u8 is_server = 0; + u64 feature_mask = (u64)~0; + u8 renumber = 0; + u32 custom_dev_instance = ~0; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "socket %s", &sock_filename)) + ; + else if (unformat (line_input, "server")) + is_server = 1; + else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask)) + ; + else if (unformat (line_input, "renumber %d", &custom_dev_instance)) { + renumber = 1; + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + vnet_main_t *vnm = vnet_get_main(); + + vhost_user_create_if(vnm, vm, (char *)sock_filename, + is_server, &sw_if_index, feature_mask, + renumber, custom_dev_instance); + + vec_free(sock_filename); + + return 0; +} + +clib_error_t * +vhost_user_delete_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + u32 sw_if_index = ~0; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + vnet_main_t *vnm = vnet_get_main(); + + vhost_user_delete_if(vnm, vm, sw_if_index); + + return 0; +} + +int vhost_user_dump_ifs(vnet_main_t * vnm, vlib_main_t * vm, vhost_user_intf_details_t **out_vuids) +{ + int rv = 0; + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_intf_t * vui; + vhost_user_intf_details_t * r_vuids = NULL; + vhost_user_intf_details_t * vuid = NULL; + u32 * hw_if_indices = 0; + vnet_hw_interface_t * hi; + u8 *s = NULL; + int i; + + if (!out_vuids) + return -1; + + vec_foreach (vui, vum->vhost_user_interfaces) { + if (vui->active) + vec_add1(hw_if_indices, vui->hw_if_index); + } + + for (i = 0; i < vec_len (hw_if_indices); i++) { + hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); + vui = vec_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance); + + vec_add2(r_vuids, vuid, 1); + vuid->sw_if_index = vui->sw_if_index; + vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz; + vuid->features = vui->features; + vuid->is_server = vui->sock_is_server; + vuid->num_regions = vui->nregions; + vuid->sock_errno = vui->sock_errno; + strncpy((char *)vuid->sock_filename, (char *)vui->sock_filename, + ARRAY_LEN(vuid->sock_filename)-1); + + s = format (s, "%v%c", hi->name, 0); + + strncpy((char *)vuid->if_name, (char *)s, + ARRAY_LEN(vuid->if_name)-1); + _vec_len(s) = 0; + } + + vec_free (s); + vec_free (hw_if_indices); + + *out_vuids = r_vuids; + + return rv; +} + +clib_error_t * +show_vhost_user_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t * error = 0; + vnet_main_t * vnm = vnet_get_main(); + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_intf_t * vui; + u32 hw_if_index, * hw_if_indices = 0; + vnet_hw_interface_t * hi; + int i, j, q; + int show_descr = 0; + struct feat_struct { u8 bit; char *str;}; + struct feat_struct *feat_entry; + + static struct feat_struct feat_array[] = { +#define _(s,b) { .str = #s, .bit = b, }, + foreach_virtio_net_feature +#undef _ + { .str = NULL } + }; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) { + vec_add1 (hw_if_indices, hw_if_index); + vlib_cli_output(vm, "add %d", hw_if_index); + } + else if (unformat (input, "descriptors") || unformat (input, "desc") ) + show_descr = 1; + else { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + if (vec_len (hw_if_indices) == 0) { + vec_foreach (vui, vum->vhost_user_interfaces) { + if (vui->active) + vec_add1(hw_if_indices, vui->hw_if_index); + } + } + vlib_cli_output (vm, "Virtio vhost-user interfaces"); + vlib_cli_output (vm, "Global:\n coalesce frames %d time %e\n\n", + vum->coalesce_frames, vum->coalesce_time); + + for (i = 0; i < vec_len (hw_if_indices); i++) { + hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); + vui = vec_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance); + vlib_cli_output (vm, "Interface: %s (ifindex %d)", + hi->name, hw_if_indices[i]); + + vlib_cli_output (vm, "virtio_net_hdr_sz %d\n features (0x%llx): \n", + vui->virtio_net_hdr_sz, vui->features); + + feat_entry = (struct feat_struct *) &feat_array; + while(feat_entry->str) { + if (vui->features & (1 << feat_entry->bit)) + vlib_cli_output (vm, " %s (%d)", feat_entry->str, feat_entry->bit); + feat_entry++; + } + + vlib_cli_output (vm, "\n"); + + + vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n", + vui->sock_filename, vui->sock_is_server ? "server" : "client", + strerror(vui->sock_errno)); + + vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions); + + if (vui->nregions){ + vlib_cli_output(vm, " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n"); + vlib_cli_output(vm, " ====== ===== ================== ================== ================== ================== ==================\n"); + } + for (j = 0; j < vui->nregions; j++) { + vlib_cli_output(vm, " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", j, + vui->region_mmap_fd[j], + vui->regions[j].guest_phys_addr, + vui->regions[j].memory_size, + vui->regions[j].userspace_addr, + vui->regions[j].mmap_offset, + (u64) vui->region_mmap_addr[j]); + } + for (q = 0; q < vui->num_vrings; q++) { + vlib_cli_output(vm, "\n Virtqueue %d\n", q); + + vlib_cli_output(vm, " qsz %d last_avail_idx %d last_used_idx %d\n", + vui->vrings[q].qsz, + vui->vrings[q].last_avail_idx, + vui->vrings[q].last_used_idx); + + if (vui->vrings[q].avail && vui->vrings[q].used) + vlib_cli_output(vm, " avail.flags %x avail.idx %d used.flags %x used.idx %d\n", + vui->vrings[q].avail->flags, + vui->vrings[q].avail->idx, + vui->vrings[q].used->flags, + vui->vrings[q].used->idx); + + vlib_cli_output(vm, " kickfd %d callfd %d errfd %d\n", + vui->vrings[q].kickfd, + vui->vrings[q].callfd, + vui->vrings[q].errfd); + + if (show_descr) { + vlib_cli_output(vm, "\n descriptor table:\n"); + vlib_cli_output(vm, " id addr len flags next user_addr\n"); + vlib_cli_output(vm, " ===== ================== ===== ====== ===== ==================\n"); + for(j = 0; j < vui->vrings[q].qsz; j++) { + vlib_cli_output(vm, " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", + j, + vui->vrings[q].desc[j].addr, + vui->vrings[q].desc[j].len, + vui->vrings[q].desc[j].flags, + vui->vrings[q].desc[j].next, + (u64) map_guest_mem(vui, vui->vrings[q].desc[j].addr));} + } + } + vlib_cli_output (vm, "\n"); + } +done: + vec_free (hw_if_indices); + return error; +} + +static clib_error_t * +vhost_user_config (vlib_main_t * vm, unformat_input_t * input) +{ + vhost_user_main_t * vum = &vhost_user_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames)) + ; + else if (unformat (input, "coalesce-time %f", &vum->coalesce_time)) + ; + else if (unformat (input, "dont-dump-memory")) + vum->dont_dump_vhost_user_memory = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + return 0; +} + +/* vhost-user { ... } configuration. */ +VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user"); + +void +vhost_user_unmap_all (void) +{ + vhost_user_main_t * vum = &vhost_user_main; + vhost_user_intf_t * vui; + + if (vum->dont_dump_vhost_user_memory) + { + vec_foreach (vui, vum->vhost_user_interfaces) + { + unmap_all_mem_regions(vui); + } + } +} diff --git a/vnet/vnet/devices/virtio/vhost-user.h b/vnet/vnet/devices/virtio/vhost-user.h new file mode 100644 index 00000000000..3b57bcbfc16 --- /dev/null +++ b/vnet/vnet/devices/virtio/vhost-user.h @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __VIRTIO_VHOST_USER_H__ +#define __VIRTIO_VHOST_USER_H__ +/* vhost-user data structures */ + +#define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_USER_MSG_HDR_SZ 12 +#define VHOST_VRING_MAX_SIZE 32768 +#define VHOST_NET_VRING_IDX_RX 0 +#define VHOST_NET_VRING_IDX_TX 1 +#define VHOST_NET_VRING_NUM 2 + +#define VIRTQ_DESC_F_NEXT 1 + +#define foreach_virtio_net_feature \ + _ (VIRTIO_NET_F_MRG_RXBUF, 15) \ + _ (VIRTIO_F_ANY_LAYOUT, 27) + +typedef enum { +#define _(f,n) FEAT_##f = (n), + foreach_virtio_net_feature +#undef _ +} virtio_net_feature_t; + +int vhost_user_create_if(vnet_main_t * vnm, vlib_main_t * vm, + const char * sock_filename, u8 is_server, + u32 * sw_if_index, u64 feature_mask, + u8 renumber, u32 custom_dev_instance); +int vhost_user_modify_if(vnet_main_t * vnm, vlib_main_t * vm, + const char * sock_filename, u8 is_server, + u32 sw_if_index, u64 feature_mask, + u8 renumber, u32 custom_dev_instance); +int vhost_user_delete_if(vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index); + +typedef struct vhost_user_memory_region { + u64 guest_phys_addr; + u64 memory_size; + u64 userspace_addr; + u64 mmap_offset; +} vhost_user_memory_region_t; + +typedef struct vhost_user_memory { + u32 nregions; + u32 padding; + vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]; +} vhost_user_memory_t; + +typedef struct vhost_vring_state { + unsigned int index, num; +} vhost_vring_state_t; + +typedef struct vhost_vring_addr { + unsigned int index, flags; + u64 desc_user_addr, used_user_addr, avail_user_addr, log_guest_addr; +} vhost_vring_addr_t; + +typedef enum vhost_user_req { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_MAX +} vhost_user_req_t; + +// vring_desc I/O buffer descriptor +typedef struct { + uint64_t addr; // packet data buffer address + uint32_t len; // packet data buffer size + uint16_t flags; // (see below) + uint16_t next; // optional index next descriptor in chain +} __attribute ((packed)) vring_desc_t; + +typedef struct { + uint16_t flags; + uint16_t idx; + uint16_t ring[VHOST_VRING_MAX_SIZE]; +} __attribute ((packed)) vring_avail_t; + +typedef struct { + uint16_t flags; + uint16_t idx; + struct /* vring_used_elem */ { + uint32_t id; + uint32_t len; + } ring[VHOST_VRING_MAX_SIZE]; +} __attribute ((packed)) vring_used_t; + +typedef struct { + u8 flags; + u8 gso_type; + u16 hdr_len; + u16 gso_size; + u16 csum_start; + u16 csum_offset; +} __attribute ((packed)) virtio_net_hdr_t; + +typedef struct { + virtio_net_hdr_t hdr; + u16 num_buffers; +} __attribute ((packed)) virtio_net_hdr_mrg_rxbuf_t; + +typedef struct vhost_user_msg { + vhost_user_req_t request; + u32 flags; + u32 size; + union { + u64 u64; + vhost_vring_state_t state; + vhost_vring_addr_t addr; + vhost_user_memory_t memory; + }; +} __attribute ((packed)) vhost_user_msg_t; + +typedef struct { + u32 qsz; + u16 last_avail_idx; + u16 last_used_idx; + vring_desc_t *desc; + vring_avail_t *avail; + vring_used_t *used; + int callfd; + int kickfd; + int errfd; + u32 callfd_idx; + u32 n_since_last_int; + f64 int_deadline; +} vhost_user_vring_t; + +typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK(cacheline0); + volatile u32 * lockp; + u32 is_up; + u32 admin_up; + u32 unix_fd; + u32 unix_file_index; + u32 client_fd; + char sock_filename[256]; + int sock_errno; + u8 sock_is_server; + u32 hw_if_index, sw_if_index; + u8 active; + + u32 nregions; + u64 features; + u64 feature_mask; + u32 num_vrings; + vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]; + void * region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS]; + u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS]; + vhost_user_vring_t vrings[2]; + int virtio_net_hdr_sz; + int is_any_layout; + u32 * d_trace_buffers; +} vhost_user_intf_t; + +typedef struct { + u32 ** rx_buffers; + u32 mtu_bytes; + vhost_user_intf_t * vhost_user_interfaces; + u32 * vhost_user_inactive_interfaces_index; + uword * vhost_user_interface_index_by_listener_fd; + uword * vhost_user_interface_index_by_sock_fd; + uword * vhost_user_interface_index_by_sw_if_index; + u32 * show_dev_instance_by_real_dev_instance; + u32 coalesce_frames; + f64 coalesce_time; + int dont_dump_vhost_user_memory; +} vhost_user_main_t; + +typedef struct { + u8 if_name[64]; + u32 sw_if_index; + u32 virtio_net_hdr_sz; + u64 features; + u8 is_server; + u8 sock_filename[256]; + u32 num_regions; + int sock_errno; +} vhost_user_intf_details_t; + +int vhost_user_dump_ifs(vnet_main_t * vnm, vlib_main_t * vm, + vhost_user_intf_details_t **out_vuids); + +// CLI commands to be used from dpdk +clib_error_t * +vhost_user_connect_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd); +clib_error_t * +vhost_user_delete_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd); +clib_error_t * +show_vhost_user_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd); + +#endif diff --git a/vnet/vnet/dhcp/client.c b/vnet/vnet/dhcp/client.c new file mode 100644 index 00000000000..727b5165bad --- /dev/null +++ b/vnet/vnet/dhcp/client.c @@ -0,0 +1,960 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/dhcp/proxy.h> + +dhcp_client_main_t dhcp_client_main; +static u8 * format_dhcp_client_state (u8 * s, va_list * va); +static vlib_node_registration_t dhcp_client_process_node; + +void __attribute__((weak)) +api_config_default_ip_route (u8 is_ipv6, u8 is_add, u32 vrf_id, + u32 sw_if_index, u8 *next_hop_addr) +{ + /* dummy function */ + return; +} + +static void +dhcp_client_acquire_address (dhcp_client_main_t * dcm, dhcp_client_t * c) +{ + /* + * Install any/all info gleaned from dhcp, right here + */ + ip4_add_del_interface_address (dcm->vlib_main, c->sw_if_index, + (void *) &c->leased_address, + c->subnet_mask_width, 0 /*is_del*/); +} + +static void +dhcp_client_release_address (dhcp_client_main_t * dcm, dhcp_client_t * c) +{ + /* + * Remove any/all info gleaned from dhcp, right here. Caller(s) + * have not wiped out the info yet. + */ + + ip4_add_del_interface_address (dcm->vlib_main, c->sw_if_index, + (void *) &c->leased_address, + c->subnet_mask_width, 1 /*is_del*/); +} + +static void set_l2_rewrite (dhcp_client_main_t * dcm, dhcp_client_t * c) +{ + vnet_main_t * vnm = dcm->vnet_main; + vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, c->sw_if_index); + vnet_hw_interface_class_t * hc = + vnet_get_hw_interface_class (vnm, hw->hw_class_index); + u32 n_rw; + + /* Acquire the L2 rewrite string for the indicated sw_if_index */ + vec_validate (c->l2_rewrite, 32); + ASSERT (hc->set_rewrite); + n_rw = hc->set_rewrite (dcm->vnet_main, c->sw_if_index, + VNET_L3_PACKET_TYPE_IP4, + 0 /* broadcast */, c->l2_rewrite, + vec_len(c->l2_rewrite)); + + _vec_len (c->l2_rewrite) = n_rw; +} + +/* + * dhcp_client_for_us - server-to-client callback. + * Called from proxy_node.c:dhcp_proxy_to_client_input(). + * This function first decides that the packet in question is + * actually for the dhcp client code in case we're also acting as + * a dhcp proxy. Ay caramba, what a folly! + */ +int dhcp_client_for_us (u32 bi, vlib_buffer_t * b, + ip4_header_t * ip, + udp_header_t * udp, + dhcp_header_t * dhcp) +{ + dhcp_client_main_t * dcm = &dhcp_client_main; + vlib_main_t * vm = dcm->vlib_main; + dhcp_client_t * c; + uword * p; + f64 now = vlib_time_now (dcm->vlib_main); + u8 dhcp_message_type = 0; + dhcp_option_t * o; + + /* + * Doing dhcp client on this interface? + * Presumably we will always receive dhcp clnt for-us pkts on + * the interface that's asking for an address. + */ + p = hash_get (dcm->client_by_sw_if_index, + vnet_buffer(b)->sw_if_index [VLIB_RX]); + if (p == 0) + return 0; /* no */ + + c = pool_elt_at_index (dcm->clients, p[0]); + + /* Mixing dhcp relay and dhcp proxy? DGMS... */ + if (c->state == DHCP_BOUND && c->retry_count == 0) + return 0; + + /* parse through the packet, learn what we can */ + if (dhcp->your_ip_address.as_u32) + c->leased_address.as_u32 = dhcp->your_ip_address.as_u32; + + o = (dhcp_option_t *) dhcp->options; + + while (o->option != 0xFF /* end of options */ && + (u8 *) o < (b->data + b->current_data + b->current_length)) + { + switch (o->option) + { + case 53: /* dhcp message type */ + dhcp_message_type = o->data[0]; + break; + + case 51: /* lease time */ + { + u32 lease_time_in_seconds = + clib_host_to_net_u32 (o->data_as_u32[0]); + c->lease_expires = now + (f64) lease_time_in_seconds; + c->lease_lifetime = lease_time_in_seconds; + /* Set a sensible default, in case we don't get opt 58 */ + c->lease_renewal_interval = lease_time_in_seconds / 2; + } + break; + + case 58: /* lease renew time in seconds */ + { + u32 lease_renew_time_in_seconds = + clib_host_to_net_u32 (o->data_as_u32[0]); + c->lease_renewal_interval = lease_renew_time_in_seconds; + } + break; + + case 54: /* dhcp server address */ + c->dhcp_server.as_u32 = o->data_as_u32[0]; + break; + + case 1: /* subnet mask */ + { + u32 subnet_mask = + clib_host_to_net_u32 (o->data_as_u32[0]); + c->subnet_mask_width = count_set_bits (subnet_mask); + } + break; + case 3: /* router address */ + { + u32 router_address = o->data_as_u32[0]; + c->router_address.as_u32 = router_address; + } + break; + + case 12: /* hostname */ + { + /* Replace the existing hostname if necessary */ + vec_free (c->hostname); + vec_validate (c->hostname, o->length - 1); + memcpy (c->hostname, o->data, o->length); + } + break; + + /* $$$$ Your message in this space, parse more options */ + default: + break; + } + + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + + switch (c->state) + { + case DHCP_DISCOVER: + if (dhcp_message_type != DHCP_PACKET_OFFER) + { + clib_warning ("sw_if_index %d state %U message type %d", + c->sw_if_index, format_dhcp_client_state, + c->state, dhcp_message_type); + c->next_transmit = now + 5.0; + break; + } + /* Received an offer, go send a request */ + c->state = DHCP_REQUEST; + c->retry_count = 0; + c->next_transmit = 0; /* send right now... */ + /* Poke the client process, which will send the request */ + vlib_process_signal_event (vm, dhcp_client_process_node.index, + EVENT_DHCP_CLIENT_WAKEUP, c - dcm->clients); + break; + + case DHCP_BOUND: + case DHCP_REQUEST: + if (dhcp_message_type != DHCP_PACKET_ACK) + { + clib_warning ("sw_if_index %d state %U message type %d", + c->sw_if_index, format_dhcp_client_state, + c->state, dhcp_message_type); + c->next_transmit = now + 5.0; + break; + } + /* OK, we own the address (etc), add to the routing table(s) */ + if (c->state == DHCP_REQUEST) + { + void (*fp)(u32, u32, u8 *, u8, u8 *, u8 *, u8 *) = c->event_callback; + + dhcp_client_acquire_address (dcm, c); + + /* + * Configure default IP route: + * - vrf_id is 0 by default. + */ + if (c->router_address.as_u32) + api_config_default_ip_route (0 /* is_ipv6 */, + 1 /* is_add */, + 0 /* vrf_id */, + c->sw_if_index, + (u8 *)&c->router_address); + + /* + * Call the user's event callback to report DHCP information + */ + if (fp) + (*fp) (c->client_index, /* clinet index */ + c->pid, + c->hostname, + 0, /* is_ipv6 */ + (u8 *)&c->leased_address, /* host IP address */ + (u8 *)&c->router_address, /* router IP address */ + (u8 *)(c->l2_rewrite + 6));/* host MAC address */ + } + + c->state = DHCP_BOUND; + c->retry_count = 0; + c->next_transmit = now + (f64) c->lease_renewal_interval; + c->lease_expires = now + (f64) c->lease_lifetime; + break; + + default: + clib_warning ("client %d bogus state %d", + c - dcm->clients, c->state); + break; + } + + /* drop the pkt, return 1 */ + vlib_buffer_free (vm, &bi, 1); + return 1; +} + +static void +send_dhcp_pkt (dhcp_client_main_t * dcm, dhcp_client_t * c, + dhcp_packet_type_t type, int is_broadcast) +{ + vlib_main_t * vm = dcm->vlib_main; + vnet_main_t * vnm = dcm->vnet_main; + vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, c->sw_if_index); + vnet_sw_interface_t * sup_sw + = vnet_get_sup_sw_interface (vnm, c->sw_if_index); + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, c->sw_if_index); + vlib_buffer_t * b; + u32 bi; + ip4_header_t * ip; + udp_header_t * udp; + dhcp_header_t * dhcp; + u32 * to_next; + vlib_frame_t * f; + dhcp_option_t * o; + u16 udp_length, ip_length; + + /* Interface(s) down? */ + if ((hw->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) == 0) + return; + if ((sup_sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0) + return; + if ((sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0) + return; + + if (vlib_buffer_alloc (vm, &bi, 1) != 1) { + clib_warning ("buffer allocation failure"); + c->next_transmit = 0; + return; + } + + /* Build a dhcpv4 pkt from whole cloth */ + b = vlib_get_buffer (vm, bi); + + ASSERT (b->current_data == 0); + + vnet_buffer(b)->sw_if_index[VLIB_RX] = c->sw_if_index; + if (is_broadcast) + { + f = vlib_get_frame_to_node (vm, hw->output_node_index); + vnet_buffer(b)->sw_if_index[VLIB_TX] = c->sw_if_index; + memcpy (b->data, c->l2_rewrite, vec_len(c->l2_rewrite)); + ip = (void *) + (((u8 *)vlib_buffer_get_current (b)) + vec_len (c->l2_rewrite)); + } + else + { + f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); + vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */ + ip = vlib_buffer_get_current (b); + } + + /* Enqueue the packet right now */ + to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + + if (is_broadcast) + vlib_put_frame_to_node (vm, hw->output_node_index, f); + else + vlib_put_frame_to_node (vm, ip4_lookup_node.index, f); + + udp = (udp_header_t *)(ip+1); + dhcp = (dhcp_header_t *)(udp+1); + + /* $$$ optimize, maybe */ + memset (ip, 0, sizeof (*ip) + sizeof (*udp) + sizeof (*dhcp)); + + ip->ip_version_and_header_length = 0x45; + ip->ttl = 128; + ip->protocol = IP_PROTOCOL_UDP; + + if (is_broadcast) + { + /* src = 0.0.0.0, dst = 255.255.255.255 */ + ip->dst_address.as_u32 = ~0; + } + else + { + /* Renewing an active lease, plain old ip4 src/dst */ + ip->src_address.as_u32 = c->leased_address.as_u32; + ip->dst_address.as_u32 = c->dhcp_server.as_u32; + } + + udp->src_port = clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client); + udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server); + + /* Send the interface MAC address */ + memcpy (dhcp->client_hardware_address, c->l2_rewrite + 6, 6); + + /* Lease renewal, set up client_ip_address */ + if (is_broadcast == 0) + dhcp->client_ip_address.as_u32 = c->leased_address.as_u32; + + dhcp->opcode = 1; /* request, all we send */ + dhcp->hardware_type = 1; /* ethernet */ + dhcp->hardware_address_length = 6; + dhcp->transaction_identifier = c->transaction_id; + dhcp->flags = clib_host_to_net_u16(is_broadcast ? DHCP_FLAG_BROADCAST : 0); + dhcp->magic_cookie.as_u32 = DHCP_MAGIC; + + o = (dhcp_option_t * )dhcp->options; + + /* Send option 53, the DHCP message type */ + o->option = 53; + o->length = 1; + o->data[0] = type; + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + + /* Send option 57, max msg length */ + if (0 /* not needed, apparently */) + { + o->option = 57; + o->length = 2; + { + u16 *o2 = (u16 *) o->data; + *o2 = clib_host_to_net_u16 (1152); + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + } + + /* + * If server ip address is available with non-zero value, + * option 54 (DHCP Server Identifier) is sent. + */ + if (c->dhcp_server.as_u32) + { + o->option = 54; + o->length = 4; + memcpy (o->data, &c->dhcp_server.as_u32, 4); + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + + /* send option 50, requested IP address */ + if (c->leased_address.as_u32) + { + o->option = 50; + o->length = 4; + memcpy (o->data, &c->leased_address.as_u32, 4); + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + + /* send option 12, host name */ + if (vec_len (c->hostname)) + { + o->option = 12; + o->length = vec_len (c->hostname); + memcpy (o->data, c->hostname, vec_len (c->hostname)); + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + + /* $$ maybe send the client s/w version if anyone cares */ + + /* + * send option 55, parameter request list + * The current list - see below, matches the Linux dhcp client's list + * Any specific dhcp server config and/or dhcp server may or may + * not yield specific options. + */ + o->option = 55; + o->length = vec_len (c->option_55_data); + memcpy (o->data, c->option_55_data, vec_len(c->option_55_data)); + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + + /* End of list */ + o->option = 0xff; + o->length = 0; + o++; + + b->current_length = ((u8 *)o) - b->data; + + /* fix ip length, checksum and udp length */ + ip_length = vlib_buffer_length_in_chain (vm, b); + if (is_broadcast) + ip_length -= vec_len (c->l2_rewrite); + + ip->length = clib_host_to_net_u16(ip_length); + ip->checksum = ip4_header_checksum(ip); + + udp_length = ip_length - (sizeof (*ip)); + udp->length = clib_host_to_net_u16 (udp_length); +} + +static int +dhcp_discover_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now) +{ + /* + * State machine "DISCOVER" state. Send a dhcp discover packet, + * eventually back off the retry rate. + */ + send_dhcp_pkt (dcm, c, DHCP_PACKET_DISCOVER, 1 /* is_broadcast */); + + c->retry_count++; + if (c->retry_count > 10) + c->next_transmit = now + 5.0; + else + c->next_transmit = now + 1.0; + return 0; +} + +static int +dhcp_request_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now) +{ + /* + * State machine "REQUEST" state. Send a dhcp request packet, + * eventually drop back to the discover state. + */ + send_dhcp_pkt (dcm, c, DHCP_PACKET_REQUEST, 1 /* is_broadcast */); + + c->retry_count++; + if (c->retry_count > 7 /* lucky you */) + { + c->state = DHCP_DISCOVER; + c->next_transmit = now; + c->retry_count = 0; + return 1; + } + c->next_transmit = now + 1.0; + return 0; +} + +static int +dhcp_bound_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now) +{ + /* + * State machine "BOUND" state. Send a dhcp request packet, + * eventually, when the lease expires, forget the dhcp data + * and go back to the stone age. + */ + send_dhcp_pkt (dcm, c, DHCP_PACKET_REQUEST, 0 /* is_broadcast */); + + c->retry_count++; + if (c->retry_count > 10) + c->next_transmit = now + 5.0; + else + c->next_transmit = now + 1.0; + + if (now > c->lease_expires) + { + if (c->router_address.as_u32) + api_config_default_ip_route (0 /* is_ipv6 */, + 0 /* is_add */, + 0 /* vrf_id */, + c->sw_if_index, + (u8 *)&c->router_address); + + dhcp_client_release_address (dcm, c); + c->state = DHCP_DISCOVER; + c->next_transmit = now; + c->retry_count = 0; + /* Wipe out any memory of the address we had... */ + c->leased_address.as_u32 = 0; + c->subnet_mask_width = 0; + c->router_address.as_u32 = 0; + c->lease_renewal_interval = 0; + c->dhcp_server.as_u32 = 0; + return 1; + } + return 0; +} + +static f64 dhcp_client_sm (f64 now, f64 timeout, uword pool_index) +{ + dhcp_client_main_t * dcm = &dhcp_client_main; + dhcp_client_t * c; + + /* deleted, pooched, yadda yadda yadda */ + if (pool_is_free_index (dcm->clients, pool_index)) + return timeout; + + c = pool_elt_at_index (dcm->clients, pool_index); + + /* Time for us to do something with this client? */ + if (now < c->next_transmit) + return timeout; + + again: + switch (c->state) + { + case DHCP_DISCOVER: /* send a discover */ + if (dhcp_discover_state (dcm, c, now)) + goto again; + break; + + case DHCP_REQUEST: /* send a request */ + if (dhcp_request_state (dcm, c, now)) + goto again; + break; + + case DHCP_BOUND: /* bound, renew needed? */ + if (dhcp_bound_state (dcm, c, now)) + goto again; + break; + + default: + clib_warning ("dhcp client %d bogus state %d", + c - dcm->clients, c->state); + break; + } + + if (c->next_transmit < now + timeout) + return c->next_transmit - now; + + return timeout; +} + +static uword +dhcp_client_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + f64 timeout = 100.0; + f64 now; + uword event_type; + uword * event_data = 0; + dhcp_client_main_t * dcm = &dhcp_client_main; + dhcp_client_t * c; + int i; + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, timeout); + + event_type = vlib_process_get_events (vm, &event_data); + + now = vlib_time_now (vm); + + switch (event_type) + { + case EVENT_DHCP_CLIENT_WAKEUP: + for (i = 0; i < vec_len (event_data); i++) + timeout = dhcp_client_sm (now, timeout, event_data[i]); + break; + + case ~0: + pool_foreach (c, dcm->clients, + ({ + timeout = dhcp_client_sm (now, timeout, + (uword)(c - dcm->clients)); + })); + if (pool_elts (dcm->clients) == 0) + timeout = 100.0; + break; + } + + vec_reset_length (event_data); + } + + /* NOTREACHED */ + return 0; +} + +VLIB_REGISTER_NODE (dhcp_client_process_node,static) = { + .function = dhcp_client_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dhcp-client-process", + .process_log2_n_stack_bytes = 16, +}; + +static u8 * format_dhcp_client_state (u8 * s, va_list * va) +{ + dhcp_client_state_t state = va_arg (*va, dhcp_client_state_t); + char * str = "BOGUS!"; + + switch (state) + { +#define _(a) \ + case a: \ + str = #a; \ + break; + foreach_dhcp_client_state; +#undef _ + default: + break; + } + + s = format (s, "%s", str); + return s; +} + +static u8 * format_dhcp_client (u8 * s, va_list * va) +{ + dhcp_client_main_t * dcm = va_arg (*va, dhcp_client_main_t *); + dhcp_client_t * c = va_arg (*va, dhcp_client_t *); + int verbose = va_arg (*va, int); + + s = format (s, "[%d] %U state %U ", c - dcm->clients, + format_vnet_sw_if_index_name, dcm->vnet_main, c->sw_if_index, + format_dhcp_client_state, c->state); + + if (c->leased_address.as_u32) + s = format (s, "addr %U/%d gw %U\n", + format_ip4_address, &c->leased_address, + c->subnet_mask_width, format_ip4_address, &c->router_address); + else + s = format (s, "no address\n"); + + if (verbose) + { + s = format (s, "retry count %d, next xmt %.2f", + c->retry_count, c->next_transmit); + } + return s; +} + +static clib_error_t * +show_dhcp_client_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dhcp_client_main_t * dcm = &dhcp_client_main; + dhcp_client_t * c; + int verbose = 0; + u32 sw_if_index = ~0; + uword * p; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", + unformat_vnet_sw_interface, dcm->vnet_main, + &sw_if_index)) + ; + else if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + if (sw_if_index != ~0) + { + p = hash_get (dcm->client_by_sw_if_index, sw_if_index); + if (p == 0) + return clib_error_return (0, "dhcp client not configured"); + c = pool_elt_at_index (dcm->clients, sw_if_index); + vlib_cli_output (vm, "%U", format_dhcp_client, dcm, c, verbose); + return 0; + } + + pool_foreach (c, dcm->clients, + ({ + vlib_cli_output (vm, "%U", format_dhcp_client, dcm, c, verbose); + })); + + return 0; +} + +VLIB_CLI_COMMAND (show_dhcp_client_command, static) = { + .path = "show dhcp client", + .short_help = "show dhcp client [intfc <intfc>][verbose]", + .function = show_dhcp_client_command_fn, +}; + + +int dhcp_client_add_del (dhcp_client_add_del_args_t * a) +{ + dhcp_client_main_t * dcm = &dhcp_client_main; + vlib_main_t * vm = dcm->vlib_main; + dhcp_client_t * c; + uword * p; + + p = hash_get (dcm->client_by_sw_if_index, a->sw_if_index); + + if ((p && a->is_add) || (!p && a->is_add == 0)) + return VNET_API_ERROR_INVALID_VALUE; + + if (a->is_add) + { + pool_get (dcm->clients, c); + memset (c, 0, sizeof (*c)); + c->state = DHCP_DISCOVER; + c->sw_if_index = a->sw_if_index; + c->client_index = a->client_index; + c->pid = a->pid; + c->event_callback = a->event_callback; + c->option_55_data = a->option_55_data; + c->hostname = a->hostname; + c->client_identifier = a->client_identifier; + do { + c->transaction_id = random_u32 (&dcm->seed); + } while (c->transaction_id == 0); + set_l2_rewrite (dcm, c); + hash_set (dcm->client_by_sw_if_index, a->sw_if_index, c - dcm->clients); + vlib_process_signal_event (vm, dhcp_client_process_node.index, + EVENT_DHCP_CLIENT_WAKEUP, c - dcm->clients); + } + else + { + c = pool_elt_at_index (dcm->clients, p[0]); + + if (c->router_address.as_u32) + api_config_default_ip_route (0 /* is_ipv6 */, + 0 /* is_add */, + 0 /* vrf_id */, + c->sw_if_index, + (u8 *)&c->router_address); + vec_free (c->option_55_data); + vec_free (c->hostname); + vec_free (c->client_identifier); + vec_free (c->l2_rewrite); + hash_unset (dcm->client_by_sw_if_index, c->sw_if_index); + pool_put (dcm->clients, c); + } + return 0; +} + +int +dhcp_client_config (vlib_main_t * vm, + u32 sw_if_index, + u8 * hostname, + u32 is_add, + u32 client_index, + void * event_callback, + u32 pid) +{ + dhcp_client_add_del_args_t _a, *a = &_a; + int rv; + + memset (a, 0, sizeof (*a)); + a->is_add = is_add; + a->sw_if_index = sw_if_index; + a->client_index = client_index; + a->pid = pid; + a->event_callback = event_callback; + vec_validate(a->hostname, strlen((char *)hostname) - 1); + strncpy((char *)a->hostname, (char *)hostname, vec_len(a->hostname)); + a->client_identifier = format (0, "vpe 1.0%c", 0); + /* + * Option 55 request list. These data precisely match + * the Ubuntu dhcp client. YMMV. + */ + + /* Subnet Mask */ + vec_add1 (a->option_55_data, 1); + /* Broadcast address */ + vec_add1 (a->option_55_data, 28); + /* time offset */ + vec_add1 (a->option_55_data, 2); + /* Router */ + vec_add1 (a->option_55_data, 3); + /* Domain Name */ + vec_add1 (a->option_55_data, 15); + /* DNS */ + vec_add1 (a->option_55_data, 6); + /* Domain search */ + vec_add1 (a->option_55_data, 119); + /* Host name */ + vec_add1 (a->option_55_data, 12); + /* NetBIOS name server */ + vec_add1 (a->option_55_data, 44); + /* NetBIOS Scope */ + vec_add1 (a->option_55_data, 47); + /* MTU */ + vec_add1 (a->option_55_data, 26); + /* Classless static route */ + vec_add1 (a->option_55_data, 121); + /* NTP servers */ + vec_add1 (a->option_55_data, 42); + + rv = dhcp_client_add_del (a); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_VALUE: + + vec_free (a->hostname); + vec_free (a->client_identifier); + vec_free (a->option_55_data); + + if (is_add) + clib_warning ("dhcp client already enabled on intf_idx %d", + sw_if_index); + else + clib_warning ("dhcp client not enabled on on intf_idx %d", + sw_if_index); + break; + + default: + clib_warning ("dhcp_client_add_del returned %d", rv); + } + + return rv; +} + +static clib_error_t * +dhcp_client_set_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + + dhcp_client_main_t * dcm = &dhcp_client_main; + u32 sw_if_index; + u8 * hostname = 0; + u8 sw_if_index_set = 0; + int is_add = 1; + dhcp_client_add_del_args_t _a, *a = &_a; + int rv; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", + unformat_vnet_sw_interface, dcm->vnet_main, + &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (input, "hostname %v", &hostname)) + ; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (sw_if_index_set == 0) + return clib_error_return (0, "interface not specified"); + + memset (a, 0, sizeof (*a)); + a->is_add = is_add; + a->sw_if_index = sw_if_index; + a->hostname = hostname; + a->client_identifier = format (0, "vpe 1.0%c", 0); + + /* + * Option 55 request list. These data precisely match + * the Ubuntu dhcp client. YMMV. + */ + + /* Subnet Mask */ + vec_add1 (a->option_55_data, 1); + /* Broadcast address */ + vec_add1 (a->option_55_data, 28); + /* time offset */ + vec_add1 (a->option_55_data, 2); + /* Router */ + vec_add1 (a->option_55_data, 3); + /* Domain Name */ + vec_add1 (a->option_55_data, 15); + /* DNS */ + vec_add1 (a->option_55_data, 6); + /* Domain search */ + vec_add1 (a->option_55_data, 119); + /* Host name */ + vec_add1 (a->option_55_data, 12); + /* NetBIOS name server */ + vec_add1 (a->option_55_data, 44); + /* NetBIOS Scope */ + vec_add1 (a->option_55_data, 47); + /* MTU */ + vec_add1 (a->option_55_data, 26); + /* Classless static route */ + vec_add1 (a->option_55_data, 121); + /* NTP servers */ + vec_add1 (a->option_55_data, 42); + + rv = dhcp_client_add_del (a); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_VALUE: + + vec_free (a->hostname); + vec_free (a->client_identifier); + vec_free (a->option_55_data); + if (is_add) + return clib_error_return (0, "dhcp client already enabled on %U", + format_vnet_sw_if_index_name, + dcm->vnet_main, sw_if_index); + else + return clib_error_return (0, "dhcp client not enabled on %U", + format_vnet_sw_if_index_name, + dcm->vnet_main, sw_if_index); + break; + + default: + vlib_cli_output (vm, "dhcp_client_add_del returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (dhcp_client_set_command, static) = { + .path = "set dhcp client", + .short_help = "set dhcp client [del] intfc <interface> [hostname <name>]", + .function = dhcp_client_set_command_fn, +}; + +static clib_error_t * +dhcp_client_init (vlib_main_t * vm) +{ + dhcp_client_main_t * dcm = &dhcp_client_main; + + dcm->vlib_main = vm; + dcm->vnet_main = vnet_get_main(); + dcm->seed = 0xdeaddabe; + return 0; +} + +VLIB_INIT_FUNCTION (dhcp_client_init); diff --git a/vnet/vnet/dhcp/client.h b/vnet/vnet/dhcp/client.h new file mode 100644 index 00000000000..d15e686b636 --- /dev/null +++ b/vnet/vnet/dhcp/client.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * client.h: dhcp client + */ + +#ifndef included_dhcp_client_h +#define included_dhcp_client_h + +#define foreach_dhcp_client_state \ +_(DHCP_DISCOVER) \ +_(DHCP_REQUEST) \ +_(DHCP_BOUND) + +typedef enum { +#define _(a) a, + foreach_dhcp_client_state +#undef _ +} dhcp_client_state_t; + +typedef struct { + dhcp_client_state_t state; + + /* the interface in question */ + u32 sw_if_index; + + /* State machine retry counter */ + u32 retry_count; + + /* Send next pkt at this time */ + f64 next_transmit; + f64 lease_expires; + + /* DHCP transaction ID, a random number */ + u32 transaction_id; + + /* leased address, other learned info DHCP */ + ip4_address_t leased_address; /* from your_ip_address field */ + ip4_address_t dhcp_server; + u32 subnet_mask_width; /* option 1 */ + ip4_address_t router_address; /* option 3 */ + u32 lease_renewal_interval; /* option 51 */ + u32 lease_lifetime; /* option 59 */ + + /* Requested data (option 55) */ + u8 * option_55_data; + + u8 * l2_rewrite; + + /* hostname and software client identifiers */ + u8 * hostname; + u8 * client_identifier; /* software version, e.g. vpe 1.0*/ + + /* Information used for event callback */ + u32 client_index; + u32 pid; + void * event_callback; +} dhcp_client_t; + +typedef struct { + /* DHCP client pool */ + dhcp_client_t * clients; + uword * client_by_sw_if_index; + u32 seed; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} dhcp_client_main_t; + +typedef struct { + int is_add; + u32 sw_if_index; + + /* vectors, consumed by dhcp client code */ + u8 * hostname; + u8 * client_identifier; + + /* Bytes containing requested option numbers */ + u8 * option_55_data; + + /* Information used for event callback */ + u32 client_index; + u32 pid; + void * event_callback; +} dhcp_client_add_del_args_t; + +dhcp_client_main_t dhcp_client_main; + +#define EVENT_DHCP_CLIENT_WAKEUP 1 + +int dhcp_client_for_us (u32 bi0, + vlib_buffer_t * b0, + ip4_header_t * ip0, + udp_header_t * u0, + dhcp_header_t * dh0); + +int dhcp_client_config (vlib_main_t * vm, + u32 sw_if_index, + u8 * hostname, + u32 is_add, + u32 client_index, + void *event_callback, + u32 pid); + +#endif /* included_dhcp_client_h */ diff --git a/vnet/vnet/dhcp/packet.h b/vnet/vnet/dhcp/packet.h new file mode 100644 index 00000000000..267a8eafc93 --- /dev/null +++ b/vnet/vnet/dhcp/packet.h @@ -0,0 +1,61 @@ +#ifndef included_vnet_dhcp_packet_h +#define included_vnet_dhcp_packet_h + +/* + * DHCP packet format + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip4_packet.h> + +typedef struct { + u8 opcode; /* 1 = request, 2 = reply */ + u8 hardware_type; /* 1 = ethernet */ + u8 hardware_address_length; + u8 hops; + u32 transaction_identifier; + u16 seconds; + u16 flags; +#define DHCP_FLAG_BROADCAST (1<<15) + ip4_address_t client_ip_address; + ip4_address_t your_ip_address; /* use this one */ + ip4_address_t server_ip_address; + ip4_address_t gateway_ip_address; /* use option 3, not this one */ + u8 client_hardware_address[16]; + u8 server_name[64]; + u8 boot_filename[128]; + ip4_address_t magic_cookie; + u8 options[0]; +} dhcp_header_t; + +typedef struct { + u8 option; + u8 length; + union { + u8 data[0]; + u32 data_as_u32[0]; + }; +} __attribute__((packed)) dhcp_option_t; + +typedef enum { + DHCP_PACKET_DISCOVER=1, + DHCP_PACKET_OFFER, + DHCP_PACKET_REQUEST, + DHCP_PACKET_ACK=5, +} dhcp_packet_type_t; + +/* charming antique: 99.130.83.99 is the dhcp magic cookie */ +#define DHCP_MAGIC (clib_host_to_net_u32(0x63825363)) + +#endif /* included_vnet_dhcp_packet_h */ diff --git a/vnet/vnet/dhcp/proxy.h b/vnet/vnet/dhcp/proxy.h new file mode 100644 index 00000000000..e12c0d001b5 --- /dev/null +++ b/vnet/vnet/dhcp/proxy.h @@ -0,0 +1,92 @@ +/* + * proxy.h: dhcp proxy + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_dhcp_proxy_h +#define included_dhcp_proxy_h + +#include <vnet/vnet.h> +#include <vnet/dhcp/packet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/ip4.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/pg/pg.h> +#include <vnet/ip/format.h> +#include <vnet/ip/udp.h> +#include <vnet/dhcp/client.h> + +typedef enum { +#define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n, +#include <vnet/dhcp/proxy_error.def> +#undef dhcp_proxy_error + DHCP_PROXY_N_ERROR, +} dhcp_proxy_error_t; + +typedef struct { + u32 oui; + u32 fib_id; +} vss_id; + +typedef union { + u8 as_u8[8]; + vss_id vpn_id; +} vss_info; + +typedef struct { + ip4_address_t dhcp_server; + ip4_address_t dhcp_src_address; + u32 insert_option_82; + u32 server_fib_index; + u32 valid; +} dhcp_server_t; + +typedef struct { + /* Pool of DHCP servers */ + dhcp_server_t * dhcp_servers; + + /* Pool of selected DHCP server. Zero is the default server */ + u32 * dhcp_server_index_by_rx_fib_index; + + /* to drop pkts in server-to-client direction */ + u32 error_drop_node_index; + + vss_info *opt82vss; + + /* hash lookup specific vrf_id -> option 82 vss suboption */ + uword * opt82vss_index_by_vrf_id; + + /* convenience */ + dhcp_client_main_t * dhcp_client_main; + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} dhcp_proxy_main_t; + +dhcp_proxy_main_t dhcp_proxy_main; + +int dhcp_proxy_set_server (ip4_address_t *addr, ip4_address_t *src_address, + u32 fib_id, int insert_option_82, int is_del); + +int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, + u32 rx_fib_id, + u32 server_fib_id, + int insert_option_82, int is_del); + +int dhcp_proxy_set_option82_vss(u32 vrf_id, + u32 oui, + u32 fib_id, + int is_del); +#endif /* included_dhcp_proxy_h */ diff --git a/vnet/vnet/dhcp/proxy_error.def b/vnet/vnet/dhcp/proxy_error.def new file mode 100644 index 00000000000..6aa06eb5120 --- /dev/null +++ b/vnet/vnet/dhcp/proxy_error.def @@ -0,0 +1,30 @@ +/* + * dhcp_proxy_error.def: dhcp proxy errors + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +dhcp_proxy_error (NONE, "no error") +dhcp_proxy_error (NO_SERVER, "no dhcp server configured") +dhcp_proxy_error (RELAY_TO_SERVER, "DHCP packets relayed to the server") +dhcp_proxy_error (RELAY_TO_CLIENT, "DHCP packets relayed to clients") +dhcp_proxy_error (OPTION_82_ERROR, "DHCP failed to insert option 82") +dhcp_proxy_error (NO_OPTION_82, "DHCP option 82 missing") +dhcp_proxy_error (BAD_OPTION_82, "Bad DHCP option 82 value") +dhcp_proxy_error (BAD_FIB_ID, "DHCP option 82 fib-id to fib-index map failure") +dhcp_proxy_error (NO_INTERFACE_ADDRESS, "DHCP no interface address") +dhcp_proxy_error (OPTION_82_VSS_NOT_PROCESSED, "DHCP VSS not processed by DHCP server") +dhcp_proxy_error (BAD_YIADDR, "DHCP packets with bad your_ip_address fields") +dhcp_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCP packets not from DHCP server or server FIB.") +dhcp_proxy_error (PKT_TOO_BIG, "DHCP packets which are too big.") diff --git a/vnet/vnet/dhcp/proxy_node.c b/vnet/vnet/dhcp/proxy_node.c new file mode 100644 index 00000000000..2f860226065 --- /dev/null +++ b/vnet/vnet/dhcp/proxy_node.c @@ -0,0 +1,1144 @@ +/* + * proxy_node.c: dhcp proxy node processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/dhcp/proxy.h> + +static char * dhcp_proxy_error_strings[] = { +#define dhcp_proxy_error(n,s) s, +#include "proxy_error.def" +#undef dhcp_proxy_error +}; + +#define foreach_dhcp_proxy_to_server_input_next \ + _ (DROP, "error-drop") \ + _ (LOOKUP, "ip4-lookup") \ + _ (SEND_TO_CLIENT, "dhcp-proxy-to-client") + +typedef enum { +#define _(s,n) DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s, + foreach_dhcp_proxy_to_server_input_next +#undef _ + DHCP_PROXY_TO_SERVER_INPUT_N_NEXT, +} dhcp_proxy_to_server_input_next_t; + +typedef struct { + /* 0 => to server, 1 => to client */ + int which; + ip4_address_t trace_ip4_address; + u32 error; + u32 sw_if_index; + u32 original_sw_if_index; +} dhcp_proxy_trace_t; + +#define VPP_DHCP_OPTION82_SUB1_SIZE 6 +#define VPP_DHCP_OPTION82_SUB5_SIZE 6 +#define VPP_DHCP_OPTION82_VSS_SIZE 12 +#define VPP_DHCP_OPTION82_SIZE (VPP_DHCP_OPTION82_SUB1_SIZE + \ + VPP_DHCP_OPTION82_SUB5_SIZE + \ + VPP_DHCP_OPTION82_VSS_SIZE +3) + +vlib_node_registration_t dhcp_proxy_to_server_node; +vlib_node_registration_t dhcp_proxy_to_client_node; + +u8 * format_dhcp_proxy_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + dhcp_proxy_trace_t * t = va_arg (*args, dhcp_proxy_trace_t *); + + if (t->which == 0) + s = format (s, "DHCP proxy: sent to server %U\n", + format_ip4_address, &t->trace_ip4_address, t->error); + else + s = format (s, "DHCP proxy: broadcast to client from %U\n", + format_ip4_address, &t->trace_ip4_address); + + if (t->error != (u32)~0) + s = format (s, " error: %s\n", dhcp_proxy_error_strings[t->error]); + + s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n", + t->original_sw_if_index, t->sw_if_index); + + return s; +} + +u8 * format_dhcp_proxy_header_with_length (u8 * s, va_list * args) +{ + dhcp_header_t * h = va_arg (*args, dhcp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "dhcp header truncated"); + + s = format (s, "DHCP Proxy"); + + return s; +} + +/* get first interface address */ +static ip4_address_t * +ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = 0; + ip4_address_t * result = 0; + + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip4_address_t * a = ip_interface_address_get_address (lm, ia); + result = a; + break; + })); + return result; +} + +static uword +dhcp_proxy_to_server_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + u32 pkts_to_server=0, pkts_to_client=0, pkts_no_server=0; + u32 pkts_no_interface_address=0; + u32 pkts_too_big=0; + ip4_main_t * im = &ip4_main; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0; + dhcp_header_t * h0; + ip4_header_t * ip0; + u32 next0; + u32 old0, new0; + ip_csum_t sum0; + u32 error0 = (u32) ~0; + u32 sw_if_index = 0; + u32 original_sw_if_index = 0; + u8 *end = NULL; + u32 fib_index, server_index; + dhcp_server_t * server; + u32 rx_sw_if_index; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = vlib_buffer_get_current (b0); + + /* + * udp_local hands us the DHCP header, need udp hdr, + * ip hdr to relay to server + */ + vlib_buffer_advance (b0, -(sizeof(*u0))); + u0 = vlib_buffer_get_current (b0); + + /* This blows. Return traffic has src_port = 67, dst_port = 67 */ + if (u0->src_port == clib_net_to_host_u16(UDP_DST_PORT_dhcp_to_server)) + { + vlib_buffer_advance (b0, sizeof(*u0)); + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT; + error0 = 0; + pkts_to_client++; + goto do_enqueue; + } + + rx_sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + fib_index = im->fib_index_by_sw_if_index [rx_sw_if_index]; + + if (fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) + server_index = dpm->dhcp_server_index_by_rx_fib_index[fib_index]; + else + server_index = 0; + + if (PREDICT_FALSE (pool_is_free_index (dpm->dhcp_servers, + server_index))) + { + no_server: + error0 = DHCP_PROXY_ERROR_NO_SERVER; + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_server++; + goto do_trace; + } + + server = pool_elt_at_index (dpm->dhcp_servers, server_index); + if (server->valid == 0) + goto no_server; + + vlib_buffer_advance (b0, -(sizeof(*ip0))); + ip0 = vlib_buffer_get_current (b0); + + /* disable UDP checksum */ + u0->checksum = 0; + sum0 = ip0->checksum; + old0 = ip0->dst_address.as_u32; + new0 = server->dhcp_server.as_u32; + ip0->dst_address.as_u32 = server->dhcp_server.as_u32; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + sum0 = ip0->checksum; + old0 = ip0->src_address.as_u32; + new0 = server->dhcp_src_address.as_u32; + ip0->src_address.as_u32 = new0; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + /* Send to DHCP server via the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + server->server_fib_index; + + h0->gateway_ip_address.as_u32 = server->dhcp_src_address.as_u32; + pkts_to_server++; + + if (server->insert_option_82) + { + u32 fib_index, fib_id, opt82_fib_id=0, opt82_oui=0; + ip4_fib_t * fib; + dhcp_option_t *o = (dhcp_option_t *) h0->options; + u32 len = 0; + vlib_buffer_free_list_t *fl; + + fib_index = im->fib_index_by_sw_if_index + [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; + fib = vec_elt_at_index (im->fibs, fib_index); + fib_id = fib->table_id; + + end = b0->data + b0->current_data + b0->current_length; + /* TLVs are not performance-friendly... */ + while (o->option != 0xFF /* end of options */ && (u8 *)o < end) + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + + fl = vlib_buffer_get_free_list (vm, b0->free_list_index); + // start write at (option*)o, some packets have padding + if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes) + { + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_too_big++; + goto do_trace; + } + + if ((o->option == 0xFF) && ((u8 *)o <= end)) + { + vnet_main_t *vnm = vnet_get_main(); + u16 old_l0, new_l0; + ip4_address_t _ia0, * ia0 = &_ia0; + uword *p_vss; + vss_info *vss; + vnet_sw_interface_t *swif; + sw_if_index = 0; + original_sw_if_index = 0; + + original_sw_if_index = sw_if_index = + vnet_buffer(b0)->sw_if_index[VLIB_RX]; + swif = vnet_get_sw_interface (vnm, sw_if_index); + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + p_vss = hash_get (dpm->opt82vss_index_by_vrf_id, + fib_id); + if (p_vss) + { + vss = pool_elt_at_index (dpm->opt82vss, p_vss[0]); + opt82_oui = vss->vpn_id.oui; + opt82_fib_id = vss->vpn_id.fib_id; + } + /* + * Get the first ip4 address on the [client-side] + * RX interface, if not unnumbered. otherwise use + * the loopback interface's ip address. + */ + ia0 = ip4_interface_first_address(&ip4_main, sw_if_index); + + if (ia0 == 0) + { + error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_interface_address++; + goto do_trace; + } + + /* Add option 82 */ + o->option = 82; /* option 82 */ + o->length = 12; /* 12 octets to follow */ + o->data[0] = 1; /* suboption 1, circuit ID (=FIB id) */ + o->data[1] = 4; /* length of suboption */ + o->data[2] = (original_sw_if_index >> 24) & 0xFF; + o->data[3] = (original_sw_if_index >> 16) & 0xFF; + o->data[4] = (original_sw_if_index >> 8) & 0xFF; + o->data[5] = (original_sw_if_index >> 0) & 0xFF; + o->data[6] = 5; /* suboption 5 (client RX intfc address) */ + o->data[7] = 4; /* length 4 */ + o->data[8] = ia0->as_u8[0]; + o->data[9] = ia0->as_u8[1]; + o->data[10] = ia0->as_u8[2]; + o->data[11] = ia0->as_u8[3]; + o->data[12] = 0xFF; + if (opt82_oui !=0 || opt82_fib_id != 0) + { + o->data[12] = 151; /* vss suboption */ + if (255 == opt82_fib_id) { + o->data[13] = 1; /* length */ + o->data[14] = 255; /* vss option type */ + o->data[15] = 152; /* vss control suboption */ + o->data[16] = 0; /* length */ + /* and a new "end-of-options" option (0xff) */ + o->data[17] = 0xFF; + o->length += 5; + } else { + o->data[13] = 8; /* length */ + o->data[14] = 1; /* vss option type */ + o->data[15] = (opt82_oui >> 16) & 0xff; + o->data[16] = (opt82_oui >> 8) & 0xff; + o->data[17] = (opt82_oui ) & 0xff; + o->data[18] = (opt82_fib_id >> 24) & 0xff; + o->data[19] = (opt82_fib_id >> 16) & 0xff; + o->data[20] = (opt82_fib_id >> 8) & 0xff; + o->data[21] = (opt82_fib_id) & 0xff; + o->data[22] = 152; /* vss control suboption */ + o->data[23] = 0; /* length */ + + /* and a new "end-of-options" option (0xff) */ + o->data[24] = 0xFF; + o->length += 12; + } + } + + len = o->length + 3; + b0->current_length += len; + /* Fix IP header length and checksum */ + old_l0 = ip0->length; + new_l0 = clib_net_to_host_u16 (old_l0); + new_l0 += len; + new_l0 = clib_host_to_net_u16 (new_l0); + ip0->length = new_l0; + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + /* Fix UDP length */ + new_l0 = clib_net_to_host_u16 (u0->length); + new_l0 += len; + u0->length = clib_host_to_net_u16 (new_l0); + } else { + vlib_node_increment_counter + (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_OPTION_82_ERROR, 1); + } + } + + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 0; /* to server */ + tr->error = error0; + tr->original_sw_if_index = original_sw_if_index; + tr->sw_if_index = sw_if_index; + if (next0 == DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP) + tr->trace_ip4_address.as_u32 = server->dhcp_server.as_u32; + } + + do_enqueue: + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_RELAY_TO_CLIENT, + pkts_to_client); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_RELAY_TO_SERVER, + pkts_to_server); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_NO_SERVER, + pkts_no_server); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS, + pkts_no_interface_address); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_PKT_TOO_BIG, + pkts_too_big); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dhcp_proxy_to_server_node) = { + .function = dhcp_proxy_to_server_input, + .name = "dhcp-proxy-to-server", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCP_PROXY_N_ERROR, + .error_strings = dhcp_proxy_error_strings, + + .n_next_nodes = DHCP_PROXY_TO_SERVER_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s] = n, + foreach_dhcp_proxy_to_server_input_next +#undef _ + }, + + .format_buffer = format_dhcp_proxy_header_with_length, + .format_trace = format_dhcp_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcp_proxy_header, +#endif +}; + +static uword +dhcp_proxy_to_client_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, * from; + ethernet_main_t *em = ethernet_get_main (vm); + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im = &ip4_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0; + dhcp_header_t * h0; + ip4_header_t * ip0 = 0; + ip4_address_t * ia0 = 0; + u32 old0, new0; + ip_csum_t sum0; + ethernet_interface_t *ei0; + ethernet_header_t *mac0; + vnet_hw_interface_t *hi0; + vlib_frame_t *f0; + u32 * to_next0; + u32 sw_if_index = ~0; + vnet_sw_interface_t *si0; + u32 error0 = (u32)~0; + vnet_sw_interface_t *swif; + u32 server_index; + u32 fib_index; + dhcp_server_t * server; + u32 original_sw_if_index = (u32) ~0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + /* + * udp_local hands us the DHCP header, need udp hdr, + * ip hdr to relay to client + */ + vlib_buffer_advance (b0, -(sizeof(*u0))); + u0 = vlib_buffer_get_current (b0); + + vlib_buffer_advance (b0, -(sizeof(*ip0))); + ip0 = vlib_buffer_get_current (b0); + + /* Consumed by dhcp client code? */ + if (dhcp_client_for_us (bi0, b0, ip0, u0, h0)) + continue; + + if (1 /* dpm->insert_option_82 */) + { + dhcp_option_t *o = (dhcp_option_t *) h0->options; + dhcp_option_t *sub; + + /* Parse through TLVs looking for option 82. + The circuit-ID is the FIB number we need + to track down the client-facing interface */ + + while (o->option != 0xFF /* end of options */ && + (u8 *) o < (b0->data + b0->current_data + b0->current_length)) + { + if (o->option == 82) + { + u32 vss_exist = 0; + u32 vss_ctrl = 0; + sub = (dhcp_option_t *) &o->data[0]; + while (sub->option != 0xFF /* end of options */ && + (u8 *) sub < (u8 *)(o + o->length)) { + /* If this is one of ours, it will have + total length 12, circuit-id suboption type, + and the sw_if_index */ + if (sub->option == 1 && sub->length == 4) + { + sw_if_index = (o->data[2] << 24) + | (o->data[3] << 16) + | (o->data[4] << 8) + | (o->data[5]); + } else if (sub->option == 151 && + sub->length == 7 && + sub->data[0] == 1) + vss_exist = 1; + else if (sub->option == 152 && sub->length == 0) + vss_ctrl = 1; + sub = (dhcp_option_t *) + (((uword) sub) + (sub->length + 2)); + } + if (vss_ctrl && vss_exist) + vlib_node_increment_counter + (vm, dhcp_proxy_to_client_node.index, + DHCP_PROXY_ERROR_OPTION_82_VSS_NOT_PROCESSED, 1); + + } + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + } + + if (sw_if_index == (u32)~0) + { + error0 = DHCP_PROXY_ERROR_NO_OPTION_82; + + drop_packet: + vlib_node_increment_counter (vm, dhcp_proxy_to_client_node.index, + error0, 1); + f0 = vlib_get_frame_to_node (vm, dpm->error_drop_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, dpm->error_drop_node_index, f0); + goto do_trace; + } + + + if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index)) + { + error0 = DHCP_PROXY_ERROR_BAD_OPTION_82; + goto drop_packet; + } + + fib_index = im->fib_index_by_sw_if_index [sw_if_index]; + + if (fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) + server_index = dpm->dhcp_server_index_by_rx_fib_index[fib_index]; + else + server_index = 0; + + if (PREDICT_FALSE (pool_is_free_index (dpm->dhcp_servers, + server_index))) + { + error0 = DHCP_PROXY_ERROR_BAD_OPTION_82; + goto drop_packet; + } + + server = pool_elt_at_index (dpm->dhcp_servers, server_index); + if (server->valid == 0) + { + error0 = DHCP_PROXY_ERROR_NO_SERVER; + goto drop_packet; + } + + if (ip0->src_address.as_u32 != server->dhcp_server.as_u32) + { + error0 = DHCP_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; + goto drop_packet; + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index; + + swif = vnet_get_sw_interface (vnm, sw_if_index); + original_sw_if_index = sw_if_index; + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + ia0 = ip4_interface_first_address (&ip4_main, sw_if_index); + if (ia0 == 0) + { + error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; + goto drop_packet; + } + + u0->checksum = 0; + u0->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcp_to_client); + sum0 = ip0->checksum; + old0 = ip0->dst_address.as_u32; + new0 = 0xFFFFFFFF; + ip0->dst_address.as_u32 = new0; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + dst_address /* offset of changed member */); + ip0->checksum = ip_csum_fold (sum0); + + sum0 = ip0->checksum; + old0 = ip0->src_address.as_u32; + new0 = ia0->as_u32; + ip0->src_address.as_u32 = new0; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + src_address /* offset of changed member */); + ip0->checksum = ip_csum_fold (sum0); + + vlib_buffer_advance (b0, -(sizeof(ethernet_header_t))); + si0 = vnet_get_sw_interface (vnm, original_sw_if_index); + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + vlib_buffer_advance (b0, -4 /* space for VLAN tag */); + + mac0 = vlib_buffer_get_current (b0); + + hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index); + ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance); + memcpy (mac0->src_address, ei0->address, sizeof (ei0->address)); + memset (mac0->dst_address, 0xff, sizeof (mac0->dst_address)); + mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ? + clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x0800); + + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + { + u32 * vlan_tag = (u32 *)(mac0+1); + u32 tmp; + tmp = (si0->sub.id << 16) | 0x0800; + *vlan_tag = clib_host_to_net_u32 (tmp); + } + + /* $$$ This needs to be rewritten, for sure */ + f0 = vlib_get_frame_to_node (vm, hi0->output_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, hi0->output_node_index, f0); + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 1; /* to client */ + tr->trace_ip4_address.as_u32 = ia0 ? ia0->as_u32 : 0; + tr->error = error0; + tr->original_sw_if_index = original_sw_if_index; + tr->sw_if_index = sw_if_index; + } + } + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dhcp_proxy_to_client_node) = { + .function = dhcp_proxy_to_client_input, + .name = "dhcp-proxy-to-client", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCP_PROXY_N_ERROR, + .error_strings = dhcp_proxy_error_strings, + .format_buffer = format_dhcp_proxy_header_with_length, + .format_trace = format_dhcp_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcp_proxy_header, +#endif +}; + +clib_error_t * dhcp_proxy_init (vlib_main_t * vm) +{ + dhcp_proxy_main_t * dm = &dhcp_proxy_main; + vlib_node_t * error_drop_node; + dhcp_server_t * server; + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main(); + error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); + dm->error_drop_node_index = error_drop_node->index; + + dm->opt82vss_index_by_vrf_id = hash_create (0, sizeof (uword)); + + udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_client, + dhcp_proxy_to_client_node.index, 1 /* is_ip4 */); + + udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_server, + dhcp_proxy_to_server_node.index, 1 /* is_ip4 */); + + /* Create the default server, don't mark it valid */ + pool_get (dm->dhcp_servers, server); + memset (server, 0, sizeof (*server)); + + return 0; +} + +VLIB_INIT_FUNCTION (dhcp_proxy_init); + +int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, + u32 rx_fib_id, + u32 server_fib_id, + int insert_option_82, int is_del) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + ip4_main_t * im = &ip4_main; + dhcp_server_t * server = 0; + ip4_fib_t *rx_fib, *server_fib; + u32 server_index = 0; + u32 rx_fib_index = 0; + + if (addr->as_u32 == 0) + return VNET_API_ERROR_INVALID_DST_ADDRESS; + + if (src_address->as_u32 == 0) + return VNET_API_ERROR_INVALID_SRC_ADDRESS; + + rx_fib = find_ip4_fib_by_table_index_or_id + (&ip4_main, rx_fib_id, IP4_ROUTE_FLAG_TABLE_ID); + + if (rx_fib == 0) + return VNET_API_ERROR_NO_SUCH_INNER_FIB; + + server_fib = find_ip4_fib_by_table_index_or_id + (&ip4_main, server_fib_id, IP4_ROUTE_FLAG_TABLE_ID); + + if (server_fib == 0) + return VNET_API_ERROR_NO_SUCH_FIB; + + if (rx_fib_id == 0) + { + server = pool_elt_at_index (dpm->dhcp_servers, 0); + + if (is_del) + { + memset (server, 0, sizeof (*server)); + return 0; + } + goto initialize_it; + } + + rx_fib_index = rx_fib - im->fibs; + + if (is_del) + { + if (rx_fib_index >= vec_len(dpm->dhcp_server_index_by_rx_fib_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + server_index = dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index]; + ASSERT(server_index > 0); + + /* Use the default server again. */ + dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = 0; + server = pool_elt_at_index (dpm->dhcp_servers, server_index); + memset (server, 0, sizeof (*server)); + pool_put (dpm->dhcp_servers, server); + return 0; + } + + if (rx_fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) + { + server_index = dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index]; + if (server_index != 0) + { + server = pool_elt_at_index (dpm->dhcp_servers, server_index); + goto initialize_it; + } + } + + pool_get (dpm->dhcp_servers, server); + + initialize_it: + + server->dhcp_server.as_u32 = addr->as_u32; + server->server_fib_index = server_fib - im->fibs; + server->dhcp_src_address.as_u32 = src_address->as_u32; + server->insert_option_82 = insert_option_82; + server->valid = 1; + if (rx_fib_index) + { + vec_validate (dpm->dhcp_server_index_by_rx_fib_index, rx_fib_index); + dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = + server - dpm->dhcp_servers; + } + + return 0; +} + +/* Old API, manipulates the default server (only) */ +int dhcp_proxy_set_server (ip4_address_t *addr, ip4_address_t *src_address, + u32 fib_id, int insert_option_82, int is_del) +{ + return dhcp_proxy_set_server_2 (addr, src_address, 0 /* rx_fib_id */, + fib_id /* server_fib_id */, + insert_option_82, is_del); +} + + +static clib_error_t * +dhcp_proxy_set_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip4_address_t server_addr, src_addr; + u32 server_fib_id = 0, rx_fib_id = 0; + int is_del = 0; + int add_option_82 = 0; + int set_src = 0, set_server = 0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "server %U", + unformat_ip4_address, &server_addr)) + set_server = 1; + else if (unformat (input, "server-fib-id %d", &server_fib_id)) + ; + else if (unformat (input, "rx-fib-id %d", &rx_fib_id)) + ; + else if (unformat(input, "src-address %U", + unformat_ip4_address, &src_addr)) + set_src = 1; + else if (unformat (input, "add-option-82") + || unformat (input, "insert-option-82")) + add_option_82 = 1; + else if (unformat (input, "delete") || + unformat (input, "del")) + is_del = 1; + else + break; + } + + if (is_del || (set_server && set_src)) + { + int rv; + + rv = dhcp_proxy_set_server_2 (&server_addr, &src_addr, rx_fib_id, + server_fib_id, add_option_82, is_del); + switch (rv) + { + case 0: + return 0; + + case VNET_API_ERROR_INVALID_DST_ADDRESS: + return clib_error_return (0, "Invalid server address"); + + case VNET_API_ERROR_INVALID_SRC_ADDRESS: + return clib_error_return (0, "Invalid src address"); + + case VNET_API_ERROR_NO_SUCH_INNER_FIB: + return clib_error_return (0, "No such rx fib id %d", rx_fib_id); + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "No such server fib id %d", + server_fib_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return + (0, "Fib id %d: no per-fib DHCP server configured", rx_fib_id); + + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (dhcp_proxy_set_command, static) = { + .path = "set dhcp proxy", + .short_help = "set dhcp proxy [del] server <ip-addr> src-address <ip-addr> [add-option-82] [server-fib-id <n>] [rx-fib-id <n>]", + .function = dhcp_proxy_set_command_fn, +}; + +u8 * format_dhcp_proxy_server (u8 * s, va_list * args) +{ + dhcp_proxy_main_t * dm = va_arg (*args, dhcp_proxy_main_t *); + dhcp_server_t * server = va_arg (*args, dhcp_server_t *); + u32 rx_fib_index = va_arg (*args, u32); + ip4_fib_t * rx_fib, * server_fib; + u32 server_fib_id = ~0, rx_fib_id = ~0; + + if (dm == 0) + { + s = format (s, "%=16s%=16s%=14s%=14s%=20s", "Server", "Src Address", + "Server FIB", "RX FIB", "Insert Option 82"); + return s; + } + + server_fib = find_ip4_fib_by_table_index_or_id + (&ip4_main, server->server_fib_index, IP4_ROUTE_FLAG_FIB_INDEX); + + if (server_fib) + server_fib_id = server_fib->table_id; + + rx_fib = find_ip4_fib_by_table_index_or_id + (&ip4_main, rx_fib_index, IP4_ROUTE_FLAG_FIB_INDEX); + + if (rx_fib) + rx_fib_id = rx_fib->table_id; + + s = format (s, "%=16U%=16U%=14u%=14u%=20s", + format_ip4_address, &server->dhcp_server, + format_ip4_address, &server->dhcp_src_address, + server_fib_id, rx_fib_id, + server->insert_option_82 ? "yes" : "no"); + return s; +} + +static clib_error_t * +dhcp_proxy_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + ip4_main_t * im = &ip4_main; + dhcp_server_t * server; + u32 server_index; + int i; + + vlib_cli_output (vm, "%U", format_dhcp_proxy_server, 0 /* header line */, + 0, 0); + + for (i = 0; i < vec_len (im->fibs); i++) + { + if (i < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) + server_index = dpm->dhcp_server_index_by_rx_fib_index[i]; + else + server_index = 0; + server = pool_elt_at_index (dpm->dhcp_servers, server_index); + if (server->valid) + vlib_cli_output (vm, "%U", format_dhcp_proxy_server, dpm, + server, i); + } + + return 0; +} + +VLIB_CLI_COMMAND (dhcp_proxy_show_command, static) = { + .path = "show dhcp proxy", + .short_help = "Display dhcp proxy server info", + .function = dhcp_proxy_show_command_fn, +}; + + +int dhcp_proxy_set_option82_vss( u32 vrf_id, + u32 oui, + u32 fib_id, + int is_del) +{ + dhcp_proxy_main_t *dm = &dhcp_proxy_main; + uword *p; + vss_info *a; + u32 old_oui=0, old_fib_id=0; + + p = hash_get (dm->opt82vss_index_by_vrf_id, vrf_id); + + if (p) + { + a = pool_elt_at_index (dm->opt82vss, p[0]); + if (!a) + return VNET_API_ERROR_NO_SUCH_FIB; + old_oui = a->vpn_id.oui; + old_fib_id = a->vpn_id.fib_id; + + if (is_del) + { + if (old_oui == oui && + old_fib_id == fib_id) + { + pool_put(dm->opt82vss, a); + hash_unset (dm->opt82vss_index_by_vrf_id, vrf_id); + return 0; + } + else + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + pool_put(dm->opt82vss, a); + hash_unset (dm->opt82vss_index_by_vrf_id, vrf_id); + } else if (is_del) + return VNET_API_ERROR_NO_SUCH_ENTRY; + pool_get (dm->opt82vss, a); + memset (a, ~0, sizeof (a[0])); + a->vpn_id.oui = oui; + a->vpn_id.fib_id = fib_id; + hash_set (dm->opt82vss_index_by_vrf_id, vrf_id, a - dm->opt82vss); + + return 0; +} + +static clib_error_t * +dhcp_option_82_vss_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0, got_new_vpn_id=0; + u32 oui=0, fib_id=0, tbl_id=~0; + + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat(input, "delete") || unformat(input, "del")) + is_del = 1; + else if (unformat (input, "oui %d", &oui)) + got_new_vpn_id = 1; + else if (unformat (input, "vpn-id %d", &fib_id)) + got_new_vpn_id = 1; + else if (unformat (input, "table %d", &tbl_id)) + got_new_vpn_id = 1; + else + break; + } + if (tbl_id == ~0) + return clib_error_return (0, "no table ID specified."); + + if (is_del || got_new_vpn_id) + { + int rv; + rv = dhcp_proxy_set_option82_vss(tbl_id, oui, fib_id, is_del); + switch (rv) + { + case 0: + return 0; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "option 82 vss(oui:%d, vpn-id:%d) not found in table %d", + oui, fib_id, tbl_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "option 82 vss for table %d not found in in pool.", + tbl_id); + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (dhcp_proxy_vss_command,static) = { + .path = "set dhcp option-82 vss", + .short_help = "set dhcp option-82 vss [del] table <table id> oui <oui> vpn-id <vpn-id>", + .function = dhcp_option_82_vss_fn, +}; + + +static clib_error_t * +dhcp_vss_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + dhcp_proxy_main_t * dm = &dhcp_proxy_main; + vss_info *v; + u32 oui; + u32 fib_id; + u32 tbl_id; + uword index; + + vlib_cli_output (vm, "%=9s%=11s%=12s","Table", "OUI", "VPN-ID"); + hash_foreach (tbl_id, index, dm->opt82vss_index_by_vrf_id, + ({ + v = pool_elt_at_index (dm->opt82vss, index); + oui = v->vpn_id.oui; + fib_id = v->vpn_id.fib_id; + vlib_cli_output (vm, "%=9d 0x%08x%=12d", + tbl_id, oui, fib_id); + })); + + return 0; +} + +VLIB_CLI_COMMAND (dhcp_proxy_vss_show_command, static) = { + .path = "show dhcp vss", + .short_help = "show dhcp VSS", + .function = dhcp_vss_show_command_fn, +}; + +static clib_error_t * +dhcp_option_82_address_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + dhcp_proxy_main_t *dm = &dhcp_proxy_main; + vnet_main_t *vnm = vnet_get_main(); + u32 sw_if_index0=0, sw_if_index; + ip4_address_t *ia0; + vnet_sw_interface_t *swif; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat(input, "%U", + unformat_vnet_sw_interface, dm->vnet_main, &sw_if_index0)) + { + swif = vnet_get_sw_interface (vnm, sw_if_index0); + sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ? + swif->unnumbered_sw_if_index : sw_if_index0; + ia0 = ip4_interface_first_address(&ip4_main, sw_if_index); + if (ia0) + { + vlib_cli_output (vm, "%=20s%=20s", "interface", + "source IP address"); + + vlib_cli_output (vm, "%=20U%=20U", + format_vnet_sw_if_index_name, + dm->vnet_main, sw_if_index0, + format_ip4_address, ia0); + } + else + vlib_cli_output (vm, "%=34s %=20U", + "No IPv4 address configured on", + format_vnet_sw_if_index_name, + dm->vnet_main, sw_if_index); + } + else + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (dhcp_proxy_address_show_command,static) = { + .path = "show dhcp option-82-address interface", + .short_help = "show dhcp option-82-address interface <interface>", + .function = dhcp_option_82_address_show_command_fn, +}; diff --git a/vnet/vnet/dhcpv6/packet.h b/vnet/vnet/dhcpv6/packet.h new file mode 100644 index 00000000000..8634b5d8e9b --- /dev/null +++ b/vnet/vnet/dhcpv6/packet.h @@ -0,0 +1,183 @@ +#ifndef included_vnet_dhcp_packet_h +#define included_vnet_dhcp_packet_h + +/* + * DHCP packet format + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip6_packet.h> + +// #define DHCP_VRF_NAME_MAX_LEN L3VM_MAX_NAME_STR_LEN +// #define DHCPV6_MAX_VRF_NAME_LEN L3VM_MAX_NAME_STR_LEN +#define DHCP_MAX_RELAY_ADDR 16 +#define PROTO_UDP 17 +#define DHCPV6_CLIENT_PORT 546 +#define DHCPV6_SERVER_PORT 547 +#define HOP_COUNT_LIMIT 32 +#define DHCPV6_CISCO_ENT_NUM 9 + +/* + * DHCPv6 message types + */ +typedef enum dhcpv6_msg_type_{ + DHCPV6_MSG_SOLICIT = 1, + DHCPV6_MSG_ADVERTISE = 2, + DHCPV6_MSG_REQUEST = 3, + DHCPV6_MSG_CONFIRM = 4, + DHCPV6_MSG_RENEW = 5, + DHCPV6_MSG_REBIND = 6, + DHCPV6_MSG_REPLY = 7, + DHCPV6_MSG_RELEASE = 8, + DHCPV6_MSG_DECLINE = 9, + DHCPV6_MSG_RECONFIGURE = 10, + DHCPV6_MSG_INFORMATION_REQUEST = 11, + DHCPV6_MSG_RELAY_FORW = 12, + DHCPV6_MSG_RELAY_REPL = 13, +} dhcpv6_msg_type_t; + +/* + * DHCPv6 options types + */ +enum { + DHCPV6_OPTION_CLIENTID = 1, + DHCPV6_OPTION_SERVERID = 2, + DHCPV6_OPTION_IA_NA = 3, + DHCPV6_OPTION_IA_TA = 4, + DHCPV6_OPTION_IAADDR = 5, + DHCPV6_OPTION_ORO = 6, + DHCPV6_OPTION_PREFERENCE = 7, + DHCPV6_OPTION_ELAPSED_TIME = 8, + DHCPV6_OPTION_RELAY_MSG = 9, + DHCPV6_OPTION_AUTH = 11, + DHCPV6_OPTION_UNICAST = 12, + DHCPV6_OPTION_STATUS_CODE = 13, + DHCPV6_OPTION_RAPID_COMMIT = 14, + DHCPV6_OPTION_USER_CLASS = 15, + DHCPV6_OPTION_VENDOR_CLASS = 16, + DHCPV6_OPTION_VENDOR_OPTS = 17, + DHCPV6_OPTION_INTERFACE_ID = 18, // relay agent fills this + DHCPV6_OPTION_RECONF_MSG = 19, + DHCPV6_OPTION_RECONF_ACCEPT = 20, + DHCPV6_OPTION_REMOTEID = 37, // relay agent fills this + DHCPV6_OPTION_VSS = 68, // relay agent fills this + DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS = 79, + DHCPV6_OPTION_MAX +}; + +/* +* DHCPv6 status codes + */ +enum { + DHCPV6_STATUS_SUCCESS = 0, + DHCPV6_STATUS_UNSPEC_FAIL = 1, + DHCPV6_STATUS_NOADDRS_AVAIL = 2, + DHCPV6_STATUS_NO_BINDING = 3, + DHCPV6_STATUS_NOT_ONLINK = 4, + DHCPV6_STATUS_USE_MULTICAST = 5, +}; + +/* + * DHCPv6 DUID types + */ +enum { + DHCPV6_DUID_LLT = 1, /* DUID Based on Link-layer Address Plus Time */ + DHCPV6_DUID_EN = 2, /* DUID Based on Enterprise Number */ + DHCPV6_DUID_LL = 3, /* DUID Based on Link-layer Address */ +}; + +//Structure for DHCPv6 payload from client +typedef struct dhcpv6_hdr_ { + union { + u8 msg_type; //DHCP msg type + u32 xid; // transaction id + }u; + u8 data[0]; +} dhcpv6_header_t; + + + +typedef CLIB_PACKED (struct dhcpv6_relay_ctx_ { + dhcpv6_header_t *pkt; + u32 pkt_len; + u32 dhcpv6_len; //DHCPv6 payload load +// if_ordinal iod; + u32 if_index; + u32 ctx_id; + char ctx_name[32+1]; + u8 dhcp_msg_type; +}) dhcpv6_relay_ctx_t; + +//Structure for DHCPv6 RELAY-FORWARD and DHCPv6 RELAY-REPLY pkts +typedef CLIB_PACKED (struct dhcpv6_relay_hdr_ { + u8 msg_type; + u8 hop_count; + ip6_address_t link_addr; + ip6_address_t peer_addr; + u8 data[0]; +}) dhcpv6_relay_hdr_t; + +typedef enum dhcp_stats_action_type_ { + DHCP_STATS_ACTION_FORWARDED=1, + DHCP_STATS_ACTION_RECEIVED, + DHCP_STATS_ACTION_DROPPED +} dhcp_stats_action_type_t; +//Generic counters for a packet +typedef struct dhcp_stats_counters_ { + u64 rx_pkts; //counter for received pkts + u64 tx_pkts; //counter for forwarded pkts + u64 drops; //counter for dropped pkts +} dhcp_stats_counters_t; + + +typedef enum dhcpv6_stats_drop_reason_ { + DHCPV6_RELAY_PKT_DROP_RELAYDISABLE = 1, + DHCPV6_RELAY_PKT_DROP_MAX_HOPS, + DHCPV6_RELAY_PKT_DROP_VALIDATION_FAIL, + DHCPV6_RELAY_PKT_DROP_UNKNOWN_OP_INTF, + DHCPV6_RELAY_PKT_DROP_BAD_CONTEXT, + DHCPV6_RELAY_PKT_DROP_OPT_INSERT_FAIL, + DHCPV6_RELAY_PKT_DROP_REPLY_FROM_CLIENT, +} dhcpv6_stats_drop_reason_t; + +typedef CLIB_PACKED (struct { + u16 option; + u16 length; + u8 data[0]; +}) dhcpv6_option_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u32 int_idx; +}) dhcpv6_int_id_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u8 data[8]; // data[0]:type, data[1..7]: VPN ID +}) dhcpv6_vss_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u32 ent_num; + u32 rmt_id; +}) dhcpv6_rmt_id_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u16 link_type; + u8 data[6]; // data[0]:data[5]: MAC address +}) dhcpv6_client_mac_t; + + +#endif /* included_vnet_dhcp_packet_h */ diff --git a/vnet/vnet/dhcpv6/proxy.h b/vnet/vnet/dhcpv6/proxy.h new file mode 100644 index 00000000000..bc10d7ce17e --- /dev/null +++ b/vnet/vnet/dhcpv6/proxy.h @@ -0,0 +1,88 @@ +/* + * proxy.h: dhcp proxy + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_dhcpv6_proxy_h +#define included_dhcpv6_proxy_h + +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/ip4.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/pg/pg.h> +#include <vnet/ip/format.h> +#include <vnet/ip/udp.h> +#include <vnet/dhcpv6/packet.h> + +typedef enum { +#define dhcpv6_proxy_error(n,s) DHCPV6_PROXY_ERROR_##n, +#include <vnet/dhcpv6/proxy_error.def> +#undef dhcpv6_proxy_error + DHCPV6_PROXY_N_ERROR, +} dhcpv6_proxy_error_t; + +typedef struct { + u32 oui; + u32 fib_id; +} dhcpv6_vss_id; + +typedef union { + u8 as_u8[8]; + dhcpv6_vss_id vpn_id; +} dhcpv6_vss_info; + +typedef struct { + /* server to which we we relay. $$$ vector / pool someday */ + ip6_address_t dhcpv6_server; + + /* FIB index */ + u32 server_fib_index; + + /* source address to paste into relayed pkts */ + ip6_address_t dhcpv6_src_address; + + /* all DHCP servers address */ + ip6_address_t all_dhcpv6_server_address; + ip6_address_t all_dhcpv6_server_relay_agent_address; + + /* true if the relay should insert option 82 */ + int insert_option; + + /* to drop pkts in server-to-client direction */ + u32 error_drop_node_index; + + dhcpv6_vss_info *vss; + + /* hash lookup specific vrf_id -> VSS vector index*/ + uword *vss_index_by_vrf_id; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} dhcpv6_proxy_main_t; + +dhcpv6_proxy_main_t dhcpv6_proxy_main; + +int dhcpv6_proxy_set_server (ip6_address_t *addr, ip6_address_t *src_address, + u32 fib_id, int insert_vss, int is_del); + +int dhcpv6_proxy_set_vss(u32 tbl_id, + u32 oui, + u32 fib_id, + int is_del); + +#endif /* included_dhcpv6_proxy_h */ diff --git a/vnet/vnet/dhcpv6/proxy_error.def b/vnet/vnet/dhcpv6/proxy_error.def new file mode 100644 index 00000000000..ffa1d68dda4 --- /dev/null +++ b/vnet/vnet/dhcpv6/proxy_error.def @@ -0,0 +1,27 @@ +/* + * dhcp_proxy_error.def: dhcp proxy errors + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +dhcpv6_proxy_error (NONE, "no error") +dhcpv6_proxy_error (NO_SERVER, "no dhcpv6 server configured") +dhcpv6_proxy_error (RELAY_TO_SERVER, "DHCPV6 packets relayed to the server") +dhcpv6_proxy_error (RELAY_TO_CLIENT, "DHCPV6 packets relayed to clients") +dhcpv6_proxy_error (NO_INTERFACE_ADDRESS, "DHCPV6 no interface address") +dhcpv6_proxy_error (WRONG_MESSAGE_TYPE, "DHCPV6 wrong message type.") +dhcpv6_proxy_error (NO_SRC_ADDRESS, "DHCPV6 no srouce IPv6 address configured.") +dhcpv6_proxy_error (NO_CIRCUIT_ID_OPTION, "DHCPv6 reply packets without circuit ID option") +dhcpv6_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCPv6 packets not from DHCPv6 server or server FIB.") +dhcpv6_proxy_error (PKT_TOO_BIG, "DHCPv6 packets which are too big.") diff --git a/vnet/vnet/dhcpv6/proxy_node.c b/vnet/vnet/dhcpv6/proxy_node.c new file mode 100644 index 00000000000..e41fe9a1b3e --- /dev/null +++ b/vnet/vnet/dhcpv6/proxy_node.c @@ -0,0 +1,1046 @@ +/* + * proxy_node.c: dhcpv6 proxy node processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/dhcpv6/proxy.h> + +static char * dhcpv6_proxy_error_strings[] = { +#define dhcpv6_proxy_error(n,s) s, +#include "proxy_error.def" +#undef dhcpv6_proxy_error +}; + +#define foreach_dhcpv6_proxy_to_server_input_next \ + _ (DROP, "error-drop") \ + _ (LOOKUP, "ip6-lookup") \ + _ (SEND_TO_CLIENT, "dhcpv6-proxy-to-client") + + +typedef enum { +#define _(s,n) DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s, + foreach_dhcpv6_proxy_to_server_input_next +#undef _ + DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT, +} dhcpv6_proxy_to_server_input_next_t; + +typedef struct { + /* 0 => to server, 1 => to client */ + int which; + u8 packet_data[64]; + u32 error; + u32 sw_if_index; + u32 original_sw_if_index; +} dhcpv6_proxy_trace_t; + +vlib_node_registration_t dhcpv6_proxy_to_server_node; +vlib_node_registration_t dhcpv6_proxy_to_client_node; + + +u8 * format_dhcpv6_proxy_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + dhcpv6_proxy_trace_t * t = va_arg (*args, dhcpv6_proxy_trace_t *); + + if (t->which == 0) + s = format (s, "DHCPV6 proxy: sent to server %U", + format_ip6_address, &t->packet_data, sizeof (ip6_address_t)); + else + s = format (s, "DHCPV6 proxy: sent to client from %U", + format_ip6_address, &t->packet_data, sizeof (ip6_address_t)); + if (t->error != (u32)~0) + s = format (s, " error: %s\n", dhcpv6_proxy_error_strings[t->error]); + + s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n", + t->original_sw_if_index, t->sw_if_index); + + return s; +} + +u8 * format_dhcpv6_proxy_header_with_length (u8 * s, va_list * args) +{ + dhcpv6_header_t * h = va_arg (*args, dhcpv6_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "dhcpv6 header truncated"); + + s = format (s, "DHCPV6 Proxy"); + + return s; +} +/* get first interface address */ +static ip6_address_t * +ip6_interface_first_global_or_site_address (ip6_main_t * im, u32 sw_if_index) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = 0; + ip6_address_t * result = 0; + + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip6_address_t * a = ip_interface_address_get_address (lm, ia); + if ((a->as_u8[0] & 0xe0) == 0x20 || + (a->as_u8[0] & 0xfe) == 0xfc) { + result = a; + break; + } + })); + return result; +} + +/* get first interface address */ +static ip6_address_t * +ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = 0; + ip6_address_t * result = 0; + + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip6_address_t * a = ip_interface_address_get_address (lm, ia); + result = a; + break; + })); + return result; +} + +static inline void copy_ip6_address (ip6_address_t *dst, ip6_address_t *src) +{ + + dst->as_u64[0] = src->as_u64[0]; + dst->as_u64[1] = src->as_u64[1]; +} + +static uword +dhcpv6_proxy_to_server_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + u32 pkts_to_server=0, pkts_to_client=0; + u32 pkts_no_interface_address=0, pkts_no_exceeding_max_hop=0; + u32 pkts_no_src_address=0; + u32 pkts_wrong_msg_type=0; + u32 pkts_too_big=0; + ip6_main_t * im = &ip6_main; + u32 fib_index=0, fib_id=0; + ip6_fib_t * fib; + ip6_address_t * src; + int bogus_length; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vnet_main_t *vnm = vnet_get_main(); + u32 sw_if_index = 0; + u32 original_sw_if_index = 0; + vnet_sw_interface_t *swif; + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0, *u1; + dhcpv6_header_t * h0; // client msg hdr + ip6_header_t * ip0, *ip1; + ip6_address_t _ia0, *ia0=&_ia0; + u32 next0; + u32 error0 = (u32) ~0; + dhcpv6_option_t *fwd_opt; + dhcpv6_relay_hdr_t *r1; + u16 len; + dhcpv6_int_id_t *id1; + dhcpv6_vss_t *vss1; + dhcpv6_client_mac_t *cmac; // client mac + ethernet_header_t * e_h0; + u8 client_src_mac[6]; + vlib_buffer_free_list_t *fl; + + uword *p_vss; + u32 oui1=0; + u32 fib_id1; + dhcpv6_vss_info *vss; + + fib_id1 = 0; + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = vlib_buffer_get_current (b0); + e_h0 = (ethernet_header_t *)b0->data; + memcpy(client_src_mac, e_h0->src_address, 6); + /* Send to DHCPV6 server via the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + dpm->server_fib_index; + + /* + * udp_local hands us the DHCPV6 header. + */ + u0 = (void *)h0 -(sizeof(*u0)); + ip0 = (void *)u0 -(sizeof(*ip0)); + + switch (h0->u.msg_type) { + case DHCPV6_MSG_SOLICIT: + case DHCPV6_MSG_REQUEST: + case DHCPV6_MSG_CONFIRM: + case DHCPV6_MSG_RENEW: + case DHCPV6_MSG_REBIND: + case DHCPV6_MSG_RELEASE: + case DHCPV6_MSG_DECLINE: + case DHCPV6_MSG_INFORMATION_REQUEST: + case DHCPV6_MSG_RELAY_FORW: + /* send to server */ + break; + case DHCPV6_MSG_RELAY_REPL: + /* send to client */ + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT; + error0 = 0; + pkts_to_client++; + goto do_enqueue; + default: + /* drop the packet */ + pkts_wrong_msg_type++; + error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + goto do_trace; + + } + /* relay-option header pointer */ + vlib_buffer_advance(b0, -(sizeof(*fwd_opt))); + fwd_opt = vlib_buffer_get_current(b0); + /* relay message header pointer */ + vlib_buffer_advance(b0, -(sizeof(*r1))); + r1 = vlib_buffer_get_current(b0); + + vlib_buffer_advance(b0, -(sizeof(*u1))); + u1 = vlib_buffer_get_current(b0); + + vlib_buffer_advance(b0, -(sizeof(*ip1))); + ip1 = vlib_buffer_get_current(b0); + + /* fill in all that rubbish... */ + len = clib_net_to_host_u16(u0->length) - sizeof(udp_header_t); + copy_ip6_address(&r1->peer_addr, &ip0->src_address); + + r1->msg_type = DHCPV6_MSG_RELAY_FORW; + fwd_opt->length = clib_host_to_net_u16(len); + fwd_opt->option = clib_host_to_net_u16(DHCPV6_OPTION_RELAY_MSG); + + r1->hop_count++; + r1->hop_count = (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW) ? 0 : r1->hop_count; + + if (PREDICT_FALSE(r1->hop_count >= HOP_COUNT_LIMIT)) + { + error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_exceeding_max_hop++; + goto do_trace; + } + + + /* If relay-fwd and src address is site or global unicast address */ + if (h0->u.msg_type == DHCPV6_MSG_RELAY_FORW && + ((ip0->src_address.as_u8[0] & 0xe0) == 0x20 || + (ip0->src_address.as_u8[0] & 0xfe) == 0xfc)) + { + /* Set link address to zero */ + r1->link_addr.as_u64[0] = 0; + r1->link_addr.as_u64[1] = 0; + goto link_address_set; + } + + /* if receiving interface is unnumbered, use receiving interface + * IP address as link address, otherwise use the loopback interface + * IP address as link address. + */ + original_sw_if_index = sw_if_index = + vnet_buffer(b0)->sw_if_index[VLIB_RX]; + swif = vnet_get_sw_interface (vnm, sw_if_index); + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + ia0 = ip6_interface_first_global_or_site_address(&ip6_main, sw_if_index); + if (ia0 == 0) + { + error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_interface_address++; + goto do_trace; + } + + copy_ip6_address(&r1->link_addr, ia0); + + link_address_set: + fl = vlib_buffer_get_free_list (vm, b0->free_list_index); + + if ((b0->current_length+sizeof(*id1)+sizeof(*vss1)+sizeof(*cmac)) + > fl->n_data_bytes) + { + error0 = DHCPV6_PROXY_ERROR_PKT_TOO_BIG; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_too_big++; + goto do_trace; + } + + id1 = (dhcpv6_int_id_t *) (((uword) ip1) + b0->current_length); + b0->current_length += (sizeof (*id1)); + + fib_index = im->fib_index_by_sw_if_index + [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; + fib = vec_elt_at_index (im->fibs, fib_index); + fib_id = fib->table_id; + + p_vss = hash_get (dpm->vss_index_by_vrf_id, + fib_id); + if (p_vss) + { + vss = pool_elt_at_index (dpm->vss, p_vss[0]); + oui1 = vss->vpn_id.oui; + fib_id1 = vss->vpn_id.fib_id; + } + + id1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_INTERFACE_ID); + id1->opt.length = clib_host_to_net_u16(sizeof(original_sw_if_index)); + id1->int_idx = clib_host_to_net_u32(original_sw_if_index); + + u1->length =0; + if (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW) + { + cmac = (dhcpv6_client_mac_t *) (((uword) ip1) + b0->current_length); + b0->current_length += (sizeof (*cmac)); + cmac->opt.length =clib_host_to_net_u16(sizeof(*cmac) - + sizeof(cmac->opt)); + cmac->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS); + cmac->link_type = clib_host_to_net_u16(1); // ethernet + memcpy(cmac->data, client_src_mac, 6); + u1->length += sizeof(*cmac); + } + if (oui1 || fib_id1) { + vss1 = (dhcpv6_vss_t *) (((uword) ip1) + b0->current_length); + b0->current_length += (sizeof (*vss1)); + vss1->opt.length =clib_host_to_net_u16(sizeof(*vss1) - + sizeof(vss1->opt)); + vss1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_VSS); + vss1->data[0] = 1; // type + vss1->data[1] = oui1>>16 & 0xff; + vss1->data[2] = oui1>>8 & 0xff; + vss1->data[3] = oui1 & 0xff; + vss1->data[4] = fib_id1>>24 & 0xff; + vss1->data[5] = fib_id1>>16 & 0xff; + vss1->data[6] = fib_id1>>8 & 0xff; + vss1->data[7] = fib_id1 & 0xff; + u1->length += sizeof(*vss1); + } + + pkts_to_server++; + u1->checksum = 0; + u1->src_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_client); + u1->dst_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_server); + + u1->length = + clib_host_to_net_u16( clib_net_to_host_u16(fwd_opt->length) + + sizeof(*r1) + sizeof(*fwd_opt) + + sizeof(*u1) + sizeof(*id1) + u1->length); + + memset(ip1, 0, sizeof(*ip1)); + ip1->ip_version_traffic_class_and_flow_label = 0x60; + ip1->payload_length = u1->length; + ip1->protocol = PROTO_UDP; + ip1->hop_limit = HOP_COUNT_LIMIT; + src = (dpm->dhcpv6_server.as_u64[0] || dpm->dhcpv6_server.as_u64[1]) ? + &dpm->dhcpv6_server : &dpm->all_dhcpv6_server_address; + copy_ip6_address(&ip1->dst_address, src); + + + ia0 = ip6_interface_first_global_or_site_address + (&ip6_main, vnet_buffer(b0)->sw_if_index[VLIB_RX]); + + src = (dpm->dhcpv6_src_address.as_u64[0] || dpm->dhcpv6_src_address.as_u64[1]) ? + &dpm->dhcpv6_src_address : ia0; + if (ia0 == 0) + { + error0 = DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_src_address++; + goto do_trace; + } + + copy_ip6_address (&ip1->src_address, src); + + + u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1, + &bogus_length); + ASSERT(bogus_length == 0); + + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 0; /* to server */ + tr->error = error0; + tr->original_sw_if_index = original_sw_if_index; + tr->sw_if_index = sw_if_index; + if (DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP == next0) + copy_ip6_address((ip6_address_t *)&tr->packet_data[0], &dpm->dhcpv6_server); + } + + do_enqueue: + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_RELAY_TO_CLIENT, + pkts_to_client); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_RELAY_TO_SERVER, + pkts_to_server); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS, + pkts_no_interface_address); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE, + pkts_wrong_msg_type); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS, + pkts_no_src_address); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_PKT_TOO_BIG, + pkts_too_big); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dhcpv6_proxy_to_server_node) = { + .function = dhcpv6_proxy_to_server_input, + .name = "dhcpv6-proxy-to-server", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCPV6_PROXY_N_ERROR, + .error_strings = dhcpv6_proxy_error_strings, + + .n_next_nodes = DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s] = n, + foreach_dhcpv6_proxy_to_server_input_next +#undef _ + }, + + .format_buffer = format_dhcpv6_proxy_header_with_length, + .format_trace = format_dhcpv6_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcpv6_proxy_header, +#endif +}; + +static uword +dhcpv6_proxy_to_client_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + + u32 n_left_from, * from; + ethernet_main_t *em = ethernet_get_main (vm); + dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main; + vnet_main_t * vnm = vnet_get_main(); + int bogus_length; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0, *u1=0; + dhcpv6_relay_hdr_t * h0; + ip6_header_t * ip1 = 0, *ip0; + ip6_address_t _ia0, * ia0 = &_ia0; + ip6_address_t client_address; + ethernet_interface_t *ei0; + ethernet_header_t *mac0; + vnet_hw_interface_t *hi0; + vlib_frame_t *f0; + u32 * to_next0; + u32 sw_if_index = ~0; + u32 original_sw_if_index = ~0; + vnet_sw_interface_t *si0; + u32 error0 = (u32)~0; + vnet_sw_interface_t *swif; + dhcpv6_option_t *r0, *o; + u16 len = 0; + u32 svr_fib_index, svr_fib_id; + ip6_fib_t * svr_fib; + ip6_main_t * im = &ip6_main; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + if (DHCPV6_MSG_RELAY_REPL != h0->msg_type) + { + error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE; + + drop_packet: + vlib_node_increment_counter (vm, dhcpv6_proxy_to_client_node.index, + error0, 1); + + f0 = vlib_get_frame_to_node (vm, dpm->error_drop_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, dpm->error_drop_node_index, f0); + goto do_trace; + } + /* hop count seems not need to be checked */ + if (HOP_COUNT_LIMIT < h0->hop_count) + { + error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS; + goto drop_packet; + } + u0 = (void *)h0 -(sizeof(*u0)); + ip0 = (void *)u0 -(sizeof(*ip0)); + + vlib_buffer_advance (b0, sizeof(*h0)); + o = r0 = vlib_buffer_get_current (b0); + + /* Parse through TLVs looking for option 9 (DHCPV6_OPTION_INTERFACE_ID). + The interface-ID is the FIB number we need + to track down the client-facing interface */ + + while ((u8 *) o < (b0->data + b0->current_data + b0->current_length)) + { + if (DHCPV6_OPTION_INTERFACE_ID == clib_net_to_host_u16(o->option)) + { + if (clib_net_to_host_u16(o->length) == sizeof(sw_if_index)) + sw_if_index = clib_net_to_host_u32(((dhcpv6_int_id_t*)o)->int_idx); + break; + } + o = (dhcpv6_option_t *) (((uword) o) + clib_net_to_host_u16(o->length) + sizeof(*o)); + } + + if ((u32)~0 == sw_if_index) + { + error0 = DHCPV6_PROXY_ERROR_NO_CIRCUIT_ID_OPTION; + goto drop_packet; + } + + svr_fib_index = im->fib_index_by_sw_if_index + [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; + svr_fib = vec_elt_at_index (im->fibs, svr_fib_index); + svr_fib_id = svr_fib->table_id; + + if (svr_fib_id != dpm->server_fib_index || + ip0->src_address.as_u64[0] != dpm->dhcpv6_server.as_u64[0] || + ip0->src_address.as_u64[1] != dpm->dhcpv6_server.as_u64[1]) + { + //drop packet if not from server with configured address or FIB + error0 = DHCPV6_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; + goto drop_packet; + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = original_sw_if_index + = sw_if_index; + + swif = vnet_get_sw_interface (vnm, original_sw_if_index); + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + vlib_buffer_advance (b0, sizeof(*r0)); + /* + * udp_local hands us the DHCPV6 header, need udp hdr, + * ip hdr to relay to client + */ + vlib_buffer_advance (b0, -(sizeof(*u1))); + u1 = vlib_buffer_get_current (b0); + + vlib_buffer_advance (b0, -(sizeof(*ip1))); + ip1 = vlib_buffer_get_current (b0); + + copy_ip6_address(&client_address, &h0->peer_addr); + + ia0 = ip6_interface_first_address (&ip6_main, sw_if_index); + if (ia0 == 0) + { + error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS; + goto drop_packet; + } + + len = clib_net_to_host_u16(r0->length); + memset(ip1, 0, sizeof(*ip1)); + copy_ip6_address(&ip1->dst_address, &client_address); + u1->checksum = 0; + u1->src_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_server); + u1->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_client); + u1->length = clib_host_to_net_u16 (len + sizeof(udp_header_t)); + + ip1->ip_version_traffic_class_and_flow_label = + ip0->ip_version_traffic_class_and_flow_label & + 0x00000fff; + ip1->payload_length = u1->length; + ip1->protocol = PROTO_UDP; + ip1->hop_limit = HOP_COUNT_LIMIT; + copy_ip6_address(&ip1->src_address, ia0); + + u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1, + &bogus_length); + ASSERT(bogus_length == 0); + + vlib_buffer_advance (b0, -(sizeof(ethernet_header_t))); + si0 = vnet_get_sw_interface (vnm, original_sw_if_index); + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + vlib_buffer_advance (b0, -4 /* space for VLAN tag */); + + mac0 = vlib_buffer_get_current (b0); + + hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index); + ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance); + memcpy (mac0->src_address, ei0->address, sizeof (ei0->address)); + memset (&mac0->dst_address, 0xff, sizeof (mac0->dst_address)); + mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ? + clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x86dd); + + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + { + u32 * vlan_tag = (u32 *)(mac0+1); + u32 tmp; + tmp = (si0->sub.id << 16) | 0x0800; + *vlan_tag = clib_host_to_net_u32 (tmp); + } + + /* $$$ consider adding a dynamic next to the graph node, for performance */ + f0 = vlib_get_frame_to_node (vm, hi0->output_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, hi0->output_node_index, f0); + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 1; /* to client */ + if (ia0) + copy_ip6_address((ip6_address_t*)tr->packet_data, ia0); + tr->error = error0; + tr->original_sw_if_index = original_sw_if_index; + tr->sw_if_index = sw_if_index; + } + } + return from_frame->n_vectors; + +} + +VLIB_REGISTER_NODE (dhcpv6_proxy_to_client_node) = { + .function = dhcpv6_proxy_to_client_input, + .name = "dhcpv6-proxy-to-client", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCPV6_PROXY_N_ERROR, + .error_strings = dhcpv6_proxy_error_strings, + .format_buffer = format_dhcpv6_proxy_header_with_length, + .format_trace = format_dhcpv6_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcpv6_proxy_header, +#endif +}; + +clib_error_t * dhcpv6_proxy_init (vlib_main_t * vm) +{ + dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; + vlib_node_t * error_drop_node; + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main(); + error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); + dm->error_drop_node_index = error_drop_node->index; + + /* RFC says this is the dhcpv6 server address */ + dm->all_dhcpv6_server_address.as_u64[0] = clib_host_to_net_u64 (0xFF05000000000000); + dm->all_dhcpv6_server_address.as_u64[1] = clib_host_to_net_u64 (0x00010003); + + /* RFC says this is the server and agent address */ + dm->all_dhcpv6_server_relay_agent_address.as_u64[0] = clib_host_to_net_u64 (0xFF02000000000000); + dm->all_dhcpv6_server_relay_agent_address.as_u64[1] = clib_host_to_net_u64 (0x00010002); + + udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_client, + dhcpv6_proxy_to_client_node.index, 0 /* is_ip4 */); + + udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_server, + dhcpv6_proxy_to_server_node.index, 0 /* is_ip6 */); + return 0; +} + +VLIB_INIT_FUNCTION (dhcpv6_proxy_init); + +int dhcpv6_proxy_set_server (ip6_address_t *addr, ip6_address_t *src_address, + u32 fib_id, int insert_vss, int is_del) +{ + dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; + ip6_main_t * im = &ip6_main; + uword * p; + + + if (is_del) + { + dm->dhcpv6_server.as_u64[0] = 0; + dm->dhcpv6_server.as_u64[1] = 0; + dm->server_fib_index = 0; + dm->dhcpv6_src_address.as_u64[0] = 0; + dm->dhcpv6_src_address.as_u64[1] = 0; + dm->insert_option = 0; + return 0; + } + + if (addr->as_u64[0] == 0 && + addr->as_u64[1] == 0 ) + return VNET_API_ERROR_INVALID_DST_ADDRESS; + + if (src_address->as_u64[0] == 0 && + src_address->as_u64[1] == 0) + return VNET_API_ERROR_INVALID_SRC_ADDRESS; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (p == 0) + return VNET_API_ERROR_NO_SUCH_FIB; + + copy_ip6_address(&dm->dhcpv6_server, addr); + dm->server_fib_index = p[0]; + copy_ip6_address(&dm->dhcpv6_src_address, src_address); + dm->insert_option = insert_vss; + return 0; +} + +static clib_error_t * +dhcpv6_proxy_set_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_address_t addr, src_addr; + int set_server = 0, set_src_address = 0, add_opt = 0; + u32 fib_id = 0; + int is_del = 0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "server %U", + unformat_ip6_address, &addr)) + set_server = 1; + else if (unformat(input, "src-address %U", + unformat_ip6_address, &src_addr)) + set_src_address =1; + else if (unformat (input, "fib-id %d", &fib_id)) + ; + else if (unformat (input, "add-option") + || unformat (input, "insert-option")) + add_opt = 1; + else if (unformat (input, "delete") || + unformat (input, "del")) + is_del = 1; + else + break; + } + + if (is_del || (set_server && set_src_address)) + { + int rv; + + rv = dhcpv6_proxy_set_server (&addr, &src_addr, fib_id, + add_opt, is_del); + switch (rv) + { + case 0: + return 0; + + case -1: + return clib_error_return (0, "FIB id %d does not exist", fib_id); + + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_set_command, static) = { + .path = "set dhcpv6 proxy", + .short_help = "set dhcpv6 proxy server <ipv6-addr> fib-id <fib-id> src-address <ipv6-addr>", + .function = dhcpv6_proxy_set_command_fn, +}; + +u8 * format_dhcpv6_proxy_server (u8 * s, va_list * args) +{ + dhcpv6_proxy_main_t * dm = va_arg (*args, dhcpv6_proxy_main_t *); + ip6_fib_t * f; + u32 fib_id = (u32)~0; + + if (dm == 0) + { + s = format (s, "%=40s%=40s%=14s", "Server Address", "Source Address", + "Server FIB"); + return s; + } + + f = find_ip6_fib_by_table_index_or_id (&ip6_main, dm->server_fib_index, + IP6_ROUTE_FLAG_FIB_INDEX); + if (f) + fib_id = f->table_id; + + + s = format (s, "%=40U%=40U%=14u", + format_ip6_address, &dm->dhcpv6_server, + format_ip6_address, &dm->dhcpv6_src_address, + fib_id); + return s; +} + +static clib_error_t * +dhcpv6_proxy_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; + + vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, 0 /* header line */); + vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, dm); + return 0; +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_show_command, static) = { + .path = "show dhcpv6 proxy", + .short_help = "Display dhcpv6 proxy info", + .function = dhcpv6_proxy_show_command_fn, +}; + +int dhcpv6_proxy_set_vss(u32 tbl_id, + u32 oui, + u32 fib_id, + int is_del) +{ + dhcpv6_proxy_main_t *dm = &dhcpv6_proxy_main; + u32 old_oui, old_fib_id; + uword *p; + dhcpv6_vss_info *v; + + p = hash_get (dm->vss_index_by_vrf_id, tbl_id); + + if (p) { + v = pool_elt_at_index (dm->vss, p[0]); + if (!v) + return VNET_API_ERROR_NO_SUCH_FIB; + + old_oui = v->vpn_id.oui; + old_fib_id = v->vpn_id.fib_id; + + if (is_del) + { + if (old_oui == oui && + old_fib_id == fib_id ) + { + pool_put(dm->vss, v); + hash_unset (dm->vss_index_by_vrf_id, tbl_id); + return 0; + } + else + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + pool_put(dm->vss, v); + hash_unset (dm->vss_index_by_vrf_id, tbl_id); + } else if (is_del) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + pool_get (dm->vss, v); + memset (v, ~0, sizeof (*v)); + v->vpn_id.fib_id = fib_id; + v->vpn_id.oui = oui; + hash_set (dm->vss_index_by_vrf_id, tbl_id, v - dm->vss); + + return 0; +} + + +static clib_error_t * +dhcpv6_vss_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0, got_new_vss=0; + u32 oui=0; + u32 fib_id=0, tbl_id=~0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "oui %d", &oui)) + got_new_vss = 1; + else if (unformat (input, "vpn-id %d", &fib_id)) + got_new_vss = 1; + else if (unformat (input, "table %d", &tbl_id)) + got_new_vss = 1; + else if (unformat(input, "delete") || unformat(input, "del")) + is_del = 1; + else + break; + } + + if (tbl_id ==~0) + return clib_error_return (0, "no table ID specified."); + + if (is_del || got_new_vss) + { + int rv; + + rv = dhcpv6_proxy_set_vss(tbl_id, oui, fib_id, is_del); + switch (rv) + { + case 0: + return 0; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "vss info (oui:%d, vpn-id:%d) not found in table %d.", + oui, fib_id, tbl_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "vss for table %d not found in pool.", + tbl_id); + + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); + +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_vss_command, static) = { + .path = "set dhcpv6 vss", + .short_help = "set dhcpv6 vss table <table-id> oui <oui> vpn-idx <vpn-idx>", + .function = dhcpv6_vss_command_fn, +}; + +static clib_error_t * +dhcpv6_vss_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; + dhcpv6_vss_info *v; + u32 oui; + u32 fib_id; + u32 tbl_id; + uword index; + + vlib_cli_output (vm, "%=6s%=6s%=12s","Table", "OUI", "VPN ID"); + hash_foreach (tbl_id, index, dm->vss_index_by_vrf_id, + ({ + v = pool_elt_at_index (dm->vss, index); + oui = v->vpn_id.oui; + fib_id = v->vpn_id.fib_id; + vlib_cli_output (vm, "%=6d%=6d%=12d", + tbl_id, oui, fib_id); + })); + + return 0; +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_vss_show_command, static) = { + .path = "show dhcpv6 vss", + .short_help = "show dhcpv6 VSS", + .function = dhcpv6_vss_show_command_fn, +}; + +static clib_error_t * +dhcpv6_link_address_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + dhcpv6_proxy_main_t *dm = &dhcpv6_proxy_main; + vnet_main_t *vnm = vnet_get_main(); + u32 sw_if_index0=0, sw_if_index; + ip6_address_t *ia0; + vnet_sw_interface_t *swif; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat(input, "%U", + unformat_vnet_sw_interface, dm->vnet_main, &sw_if_index0)) + { + swif = vnet_get_sw_interface (vnm, sw_if_index0); + sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ? + swif->unnumbered_sw_if_index : sw_if_index0; + ia0 = ip6_interface_first_address(&ip6_main, sw_if_index); + if (ia0) + { + vlib_cli_output (vm, "%=20s%=48s", "interface", "link-address"); + + vlib_cli_output (vm, "%=20U%=48U", + format_vnet_sw_if_index_name, dm->vnet_main, sw_if_index0, + format_ip6_address, ia0); + } else + vlib_cli_output (vm, "%=34s%=20U", "No IPv6 address configured on", + format_vnet_sw_if_index_name, dm->vnet_main, sw_if_index); + } else + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_address_show_command, static) = { + .path = "show dhcpv6 link-address interface", + .short_help = "show dhcpv6 link-address interface <interface>", + .function = dhcpv6_link_address_show_command_fn, +}; diff --git a/vnet/vnet/dpdk_replication.h b/vnet/vnet/dpdk_replication.h new file mode 100644 index 00000000000..b25558f9b89 --- /dev/null +++ b/vnet/vnet/dpdk_replication.h @@ -0,0 +1,107 @@ +#ifndef __included_dpdk_replication_h__ +#define __included_dpdk_replication_h__ +#include <vnet/devices/dpdk/dpdk.h> + +/* + * vlib_dpdk_clone_buffer - clone a buffer + * for port mirroring, lawful intercept, etc. + * rte_pktmbuf_clone (...) requires that the forwarding path + * not touch any of the cloned data. The hope is that we'll + * figure out how to relax that restriction. + * + * For the moment, copy packet data. + */ + +static inline vlib_buffer_t * +vlib_dpdk_clone_buffer (vlib_main_t * vm, vlib_buffer_t * b) +{ + u32 new_buffers_needed = 1; + unsigned socket_id = rte_socket_id(); + struct rte_mempool *rmp = vm->buffer_main->pktmbuf_pools[socket_id]; + struct rte_mbuf *rte_mbufs[5]; + vlib_buffer_free_list_t * fl; + vlib_buffer_t * rv; + u8 * copy_src, * copy_dst; + vlib_buffer_t *src_buf, *dst_buf; + + fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + if (PREDICT_FALSE(b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + vlib_buffer_t *tmp = b; + int i; + + while (tmp->flags & VLIB_BUFFER_NEXT_PRESENT) + { + new_buffers_needed ++; + tmp = vlib_get_buffer (vm, tmp->next_buffer); + } + + /* Should never happen... */ + if (PREDICT_FALSE(new_buffers_needed > ARRAY_LEN(rte_mbufs))) + { + clib_warning ("need %d buffers", new_buffers_needed); + return 0; + } + + if (rte_mempool_get_bulk (rmp, (void **)rte_mbufs, + new_buffers_needed) < 0) + return 0; + + src_buf = b; + rv = dst_buf = (vlib_buffer_t *)(rte_mbufs[0] + 1); + vlib_buffer_init_for_free_list (dst_buf, fl); + copy_src = b->data + src_buf->current_data; + copy_dst = dst_buf->data + src_buf->current_data; + + for (i = 0; i < new_buffers_needed; i++) + { + memcpy (copy_src, copy_dst, src_buf->current_length); + dst_buf->current_data = src_buf->current_data; + dst_buf->current_length = src_buf->current_length; + dst_buf->flags = src_buf->flags; + + if (i == 0) + { + dst_buf->total_length_not_including_first_buffer = + src_buf->total_length_not_including_first_buffer; + vnet_buffer(dst_buf)->sw_if_index[VLIB_RX] = + vnet_buffer(src_buf)->sw_if_index[VLIB_RX]; + vnet_buffer(dst_buf)->sw_if_index[VLIB_TX] = + vnet_buffer(src_buf)->sw_if_index[VLIB_TX]; + vnet_buffer(dst_buf)->l2 = vnet_buffer(b)->l2; + } + + if (i < new_buffers_needed - 1) + { + src_buf = vlib_get_buffer (vm, src_buf->next_buffer); + dst_buf = (vlib_buffer_t *)(rte_mbufs[i+1] + 1); + vlib_buffer_init_for_free_list (dst_buf, fl); + copy_src = src_buf->data; + copy_dst = dst_buf->data; + } + } + return rv; + } + + if (rte_mempool_get_bulk (rmp, (void **)rte_mbufs, 1) < 0) + return 0; + + rv = (vlib_buffer_t *)(rte_mbufs[0] + 1); + vlib_buffer_init_for_free_list (rv, fl); + + memcpy(rv->data + b->current_data, b->data + b->current_data, + b->current_length); + rv->current_data = b->current_data; + rv->current_length = b->current_length; + vnet_buffer(rv)->sw_if_index[VLIB_RX] = + vnet_buffer(b)->sw_if_index[VLIB_RX]; + vnet_buffer(rv)->sw_if_index[VLIB_TX] = + vnet_buffer(b)->sw_if_index[VLIB_TX]; + vnet_buffer(rv)->l2 = vnet_buffer(b)->l2; + + return (rv); +} + + +#endif /* __included_dpdk_replication_h__ */ diff --git a/vnet/vnet/ethernet/arp.c b/vnet/vnet/ethernet/arp.c new file mode 100644 index 00000000000..446c76a1e41 --- /dev/null +++ b/vnet/vnet/ethernet/arp.c @@ -0,0 +1,1853 @@ +/* + * ethernet/arp.c: IP v4 ARP node + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/arp_packet.h> +#include <vnet/l2/l2_input.h> +#include <vppinfra/mhash.h> + +typedef struct { + u32 sw_if_index; + u32 fib_index; + ip4_address_t ip4_address; +} ethernet_arp_ip4_key_t; + +typedef struct { + ethernet_arp_ip4_key_t key; + u8 ethernet_address[6]; + + u16 flags; +#define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC (1 << 0) + + u64 cpu_time_last_updated; +} ethernet_arp_ip4_entry_t; + +typedef struct { + u32 lo_addr; + u32 hi_addr; + u32 fib_index; +} ethernet_proxy_arp_t; + +typedef struct { + u32 next_index; + uword node_index; + uword type_opaque; + uword data; + /* Used for arp event notification only */ + void * data_callback; + u32 pid; +} pending_resolution_t; + +typedef struct { + /* Hash tables mapping name to opcode. */ + uword * opcode_by_name; + + /* lite beer "glean" adjacency handling */ + uword * pending_resolutions_by_address; + pending_resolution_t * pending_resolutions; + + /* Mac address change notification */ + uword * mac_changes_by_address; + pending_resolution_t * mac_changes; + + u32 * arp_input_next_index_by_hw_if_index; + + ethernet_arp_ip4_entry_t * ip4_entry_pool; + + mhash_t ip4_entry_by_key; + + /* ARP attack mitigation */ + u32 arp_delete_rotor; + u32 limit_arp_cache_size; + + /* Proxy arp vector */ + ethernet_proxy_arp_t * proxy_arps; +} ethernet_arp_main_t; + +static ethernet_arp_main_t ethernet_arp_main; + +static u8 * format_ethernet_arp_hardware_type (u8 * s, va_list * va) +{ + ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t); + char * t = 0; + switch (h) + { +#define _(n,f) case n: t = #f; break; + foreach_ethernet_arp_hardware_type; +#undef _ + + default: + return format (s, "unknown 0x%x", h); + } + + return format (s, "%s", t); +} + +static u8 * format_ethernet_arp_opcode (u8 * s, va_list * va) +{ + ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t); + char * t = 0; + switch (o) + { +#define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break; + foreach_ethernet_arp_opcode; +#undef _ + + default: + return format (s, "unknown 0x%x", o); + } + + return format (s, "%s", t); +} + +static uword +unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input, + va_list * args) +{ + int * result = va_arg (*args, int *); + ethernet_arp_main_t * am = ðernet_arp_main; + int x, i; + + /* Numeric opcode. */ + if (unformat (input, "0x%x", &x) + || unformat (input, "%d", &x)) + { + if (x >= (1 << 16)) + return 0; + *result = x; + return 1; + } + + /* Named type. */ + if (unformat_user (input, unformat_vlib_number_by_name, + am->opcode_by_name, &i)) + { + *result = i; + return 1; + } + + return 0; +} + +static uword +unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input, + va_list * args) +{ + int * result = va_arg (*args, int *); + if (! unformat_user (input, unformat_ethernet_arp_opcode_host_byte_order, result)) + return 0; + + *result = clib_host_to_net_u16 ((u16) *result); + return 1; +} + +static u8 * format_ethernet_arp_header (u8 * s, va_list * va) +{ + ethernet_arp_header_t * a = va_arg (*va, ethernet_arp_header_t *); + u32 max_header_bytes = va_arg (*va, u32); + uword indent; + u16 l2_type, l3_type; + + if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes) + return format (s, "ARP header truncated"); + + l2_type = clib_net_to_host_u16 (a->l2_type); + l3_type = clib_net_to_host_u16 (a->l3_type); + + indent = format_get_indent (s); + + s = format (s, "%U, type %U/%U, address size %d/%d", + format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode), + format_ethernet_arp_hardware_type, l2_type, + format_ethernet_type, l3_type, + a->n_l2_address_bytes, a->n_l3_address_bytes); + + if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet + && l3_type == ETHERNET_TYPE_IP4) + { + s = format (s, "\n%U%U/%U -> %U/%U", + format_white_space, indent, + format_ethernet_address, a->ip4_over_ethernet[0].ethernet, + format_ip4_address, &a->ip4_over_ethernet[0].ip4, + format_ethernet_address, a->ip4_over_ethernet[1].ethernet, + format_ip4_address, &a->ip4_over_ethernet[1].ip4); + } + else + { + uword n2 = a->n_l2_address_bytes; + uword n3 = a->n_l3_address_bytes; + s = format (s, "\n%U%U/%U -> %U/%U", + format_white_space, indent, + format_hex_bytes, a->data + 0*n2 + 0*n3, n2, + format_hex_bytes, a->data + 1*n2 + 0*n3, n3, + format_hex_bytes, a->data + 1*n2 + 1*n3, n2, + format_hex_bytes, a->data + 2*n2 + 1*n3, n3); + } + + return s; +} + +static u8 * format_ethernet_arp_ip4_entry (u8 * s, va_list * va) +{ + vnet_main_t * vnm = va_arg (*va, vnet_main_t *); + ethernet_arp_ip4_entry_t * e = va_arg (*va, ethernet_arp_ip4_entry_t *); + vnet_sw_interface_t * si; + ip4_fib_t * fib; + + if (! e) + return format (s, "%=12s%=6s%=16s%=4s%=20s%=24s", "Time", "FIB", "IP4", + "Static", "Ethernet", "Interface"); + + fib = find_ip4_fib_by_table_index_or_id (&ip4_main, e->key.fib_index, + IP4_ROUTE_FLAG_FIB_INDEX); + si = vnet_get_sw_interface (vnm, e->key.sw_if_index); + s = format (s, "%=12U%=6u%=16U%=4s%=20U%=25U", + format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated, + fib->table_id, + format_ip4_address, &e->key.ip4_address, + (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) ? "S" : "", + format_ethernet_address, e->ethernet_address, + format_vnet_sw_interface_name, vnm, si); + + return s; +} + +typedef struct { + u8 packet_data[64]; +} ethernet_arp_input_trace_t; + +static u8 * format_ethernet_arp_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ethernet_arp_input_trace_t * t = va_arg (*va, ethernet_arp_input_trace_t *); + + s = format (s, "%U", + format_ethernet_arp_header, + t->packet_data, sizeof (t->packet_data)); + + return s; +} + +clib_error_t * +ethernet_arp_sw_interface_up_down (vnet_main_t * vnm, + u32 sw_if_index, + u32 flags) +{ + ethernet_arp_main_t * am = ðernet_arp_main; + ethernet_arp_ip4_entry_t * e; + + if (! (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + u32 i, * to_delete = 0; + + pool_foreach (e, am->ip4_entry_pool, ({ + if (e->key.sw_if_index == sw_if_index) + vec_add1 (to_delete, e - am->ip4_entry_pool); + })); + + for (i = 0; i < vec_len (to_delete); i++) + { + ethernet_arp_ip4_over_ethernet_address_t delme; + e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]); + + memcpy (&delme.ethernet, e->ethernet_address, 6); + delme.ip4.as_u32 = e->key.ip4_address.as_u32; + + vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index, + e->key.fib_index, &delme); + } + + vec_free (to_delete); + } + + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down); + +static int +vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, + u32 sw_if_index, + u32 fib_index, + void * a_arg, + int is_static); + +static int +vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm, + u32 sw_if_index, + u32 fib_index, + void * a_arg); + +typedef struct { + u32 sw_if_index; + u32 fib_index; + ethernet_arp_ip4_over_ethernet_address_t a; + int is_static; + int is_remove; /* set is_remove=1 to clear arp entry */ +} vnet_arp_set_ip4_over_ethernet_rpc_args_t; + +static void set_ip4_over_ethernet_rpc_callback +( vnet_arp_set_ip4_over_ethernet_rpc_args_t * a) +{ + vnet_main_t * vm = vnet_get_main(); + ASSERT(os_get_cpu_number() == 0); + + if (a->is_remove) + vnet_arp_unset_ip4_over_ethernet_internal(vm, + a->sw_if_index, + a->fib_index, + &(a->a)); + else + vnet_arp_set_ip4_over_ethernet_internal (vm, + a->sw_if_index, + a->fib_index, + &(a->a), + a->is_static); +} + +int +vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, + u32 fib_index, + void * a_arg, + int is_static) +{ + ethernet_arp_ip4_over_ethernet_address_t * a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args; + + args.sw_if_index = sw_if_index; + args.fib_index = fib_index; + args.is_static = is_static; + args.is_remove = 0; + memcpy (&args.a, a, sizeof (*a)); + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) &args, sizeof (args)); + return 0; +} + +int +vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, + u32 sw_if_index, + u32 fib_index, + void * a_arg, + int is_static) +{ + ethernet_arp_ip4_key_t k; + ethernet_arp_ip4_entry_t * e = 0; + ethernet_arp_main_t * am = ðernet_arp_main; + ethernet_arp_ip4_over_ethernet_address_t * a = a_arg; + vlib_main_t * vm = vlib_get_main(); + ip4_main_t * im = &ip4_main; + int make_new_arp_cache_entry=1; + uword * p; + ip4_add_del_route_args_t args; + ip_adjacency_t adj; + pending_resolution_t * pr, * mc; + + u32 next_index; + + fib_index = (fib_index != (u32)~0) + ? fib_index : im->fib_index_by_sw_if_index[sw_if_index]; + + k.sw_if_index = sw_if_index; + k.ip4_address = a->ip4; + k.fib_index = fib_index; + + p = mhash_get (&am->ip4_entry_by_key, &k); + if (p) + { + e = pool_elt_at_index (am->ip4_entry_pool, p[0]); + + /* Refuse to over-write static arp. */ + if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) + return -2; + make_new_arp_cache_entry = 0; + } + + /* Note: always install the route. It might have been deleted */ + memset(&adj, 0, sizeof(adj)); + adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + + vnet_rewrite_for_sw_interface + (vnm, + VNET_L3_PACKET_TYPE_IP4, + sw_if_index, + ip4_rewrite_node.index, + a->ethernet, /* destination address */ + &adj.rewrite_header, + sizeof (adj.rewrite_data)); + + args.table_index_or_table_id = fib_index; + args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD | IP4_ROUTE_FLAG_NEIGHBOR; + args.dst_address = a->ip4; + args.dst_address_length = 32; + args.adj_index = ~0; + args.add_adj = &adj; + args.n_add_adj = 1; + + ip4_add_del_route (im, &args); + if (make_new_arp_cache_entry) + { + pool_get (am->ip4_entry_pool, e); + mhash_set (&am->ip4_entry_by_key, &k, + e - am->ip4_entry_pool, + /* old value */ 0); + e->key = k; + } + + /* Update time stamp and ethernet address. */ + memcpy (e->ethernet_address, a->ethernet, sizeof (e->ethernet_address)); + e->cpu_time_last_updated = clib_cpu_time_now (); + if (is_static) + e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC; + + /* Customer(s) waiting for this address to be resolved? */ + p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32); + if (p) + { + next_index = p[0]; + + while (next_index != (u32)~0) + { + pr = pool_elt_at_index (am->pending_resolutions, next_index); + vlib_process_signal_event (vm, pr->node_index, + pr->type_opaque, + pr->data); + next_index = pr->next_index; + pool_put (am->pending_resolutions, pr); + } + + hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32); + } + + /* Customer(s) requesting ARP event for this address? */ + p = hash_get (am->mac_changes_by_address, a->ip4.as_u32); + if (p) + { + next_index = p[0]; + + while (next_index != (u32)~0) + { + int (*fp)(u32, u8 *, u32, u32); + int rv = 1; + mc = pool_elt_at_index (am->mac_changes, next_index); + fp = mc->data_callback; + + /* Call the user's data callback, return 1 to suppress dup events */ + if (fp) + rv = (*fp)(mc->data, a->ethernet, sw_if_index, 0); + + /* + * Signal the resolver process, as long as the user + * says they want to be notified + */ + if (rv == 0) + vlib_process_signal_event (vm, mc->node_index, + mc->type_opaque, + mc->data); + next_index = mc->next_index; + } + } + + return 0; +} + +void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, + void * address_arg, + uword node_index, + uword type_opaque, + uword data) +{ + ethernet_arp_main_t * am = ðernet_arp_main; + ip4_address_t * address = address_arg; + uword * p; + pending_resolution_t * pr; + + pool_get (am->pending_resolutions, pr); + + pr->next_index = ~0; + pr->node_index = node_index; + pr->type_opaque = type_opaque; + pr->data = data; + pr->data_callback = 0; + + p = hash_get (am->pending_resolutions_by_address, address->as_u32); + if (p) + { + /* Insert new resolution at the head of the list */ + pr->next_index = p[0]; + hash_unset (am->pending_resolutions_by_address, address->as_u32); + } + + hash_set (am->pending_resolutions_by_address, address->as_u32, + pr - am->pending_resolutions); +} + +int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, + void * data_callback, + u32 pid, + void * address_arg, + uword node_index, + uword type_opaque, + uword data, int is_add) +{ + ethernet_arp_main_t * am = ðernet_arp_main; + ip4_address_t * address = address_arg; + uword * p; + pending_resolution_t * mc; + void (*fp)(u32, u8 *) = data_callback; + + if (is_add) + { + pool_get (am->mac_changes, mc); + + mc->next_index = ~0; + mc->node_index = node_index; + mc->type_opaque = type_opaque; + mc->data = data; + mc->data_callback = data_callback; + mc->pid = pid; + + p = hash_get (am->mac_changes_by_address, address->as_u32); + if (p) + { + /* Insert new resolution at the head of the list */ + mc->next_index = p[0]; + hash_unset (am->mac_changes_by_address, address->as_u32); + } + + hash_set (am->mac_changes_by_address, address->as_u32, + mc - am->mac_changes); + return 0; + } + else + { + u32 index; + pending_resolution_t * mc_last = 0; + + p = hash_get (am->mac_changes_by_address, address->as_u32); + if (p == 0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + index = p[0]; + + while (index != (u32)~0) + { + mc = pool_elt_at_index (am->mac_changes, index); + if (mc->node_index == node_index && + mc->type_opaque == type_opaque && + mc->pid == pid) + { + /* Clients may need to clean up pool entries, too */ + if (fp) + (*fp)(mc->data, 0 /* no new mac addrs */); + if (index == p[0]) + { + hash_unset (am->mac_changes_by_address, address->as_u32); + if (mc->next_index != ~0) + hash_set (am->mac_changes_by_address, address->as_u32, + mc->next_index); + pool_put (am->mac_changes, mc); + return 0; + } + else + { + ASSERT(mc_last); + mc_last->next_index = mc->next_index; + pool_put (am->mac_changes, mc); + return 0; + } + } + mc_last = mc; + index = mc->next_index; + } + + return VNET_API_ERROR_NO_SUCH_ENTRY; + } +} + +/* Either we drop the packet or we send a reply to the sender. */ +typedef enum { + ARP_INPUT_NEXT_DROP, + ARP_INPUT_N_NEXT, +} arp_input_next_t; + +#define foreach_ethernet_arp_error \ + _ (replies_sent, "ARP replies sent") \ + _ (l2_type_not_ethernet, "L2 type not ethernet") \ + _ (l3_type_not_ip4, "L3 type not IP4") \ + _ (l3_src_address_not_local, "IP4 source address not local to subnet") \ + _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \ + _ (l3_src_address_is_local, "IP4 source address matches local interface") \ + _ (l3_src_address_learned, "ARP request IP4 source address learned") \ + _ (replies_received, "ARP replies received") \ + _ (opcode_not_request, "ARP opcode not request") \ + _ (proxy_arp_replies_sent, "Proxy ARP replies sent") \ + _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \ + _ (missing_interface_address, "ARP missing interface address") \ + _ (gratuitous_arp, "ARP probe or announcement dropped") \ + +typedef enum { +#define _(sym,string) ETHERNET_ARP_ERROR_##sym, + foreach_ethernet_arp_error +#undef _ + ETHERNET_ARP_N_ERROR, +} ethernet_arp_input_error_t; + +/* get first interface address */ +ip4_address_t * +ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, + ip_interface_address_t ** result_ia) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = 0; + ip4_address_t * result = 0; + + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip4_address_t * a = ip_interface_address_get_address (lm, ia); + result = a; + break; + })); + if (result_ia) + *result_ia = result ? ia : 0; + return result; +} + +static void unset_random_arp_entry (void) +{ + ethernet_arp_main_t * am = ðernet_arp_main; + ethernet_arp_ip4_entry_t * e; + vnet_main_t * vnm = vnet_get_main(); + ethernet_arp_ip4_over_ethernet_address_t delme; + u32 index; + + index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor); + am->arp_delete_rotor = index; + + /* Try again from elt 0, could happen if an intfc goes down */ + if (index == ~0) + { + index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor); + am->arp_delete_rotor = index; + } + + /* Nothing left in the pool */ + if (index == ~0) + return; + + e = pool_elt_at_index (am->ip4_entry_pool, index); + + memcpy (&delme.ethernet, e->ethernet_address, 6); + delme.ip4.as_u32 = e->key.ip4_address.as_u32; + + vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index, + e->key.fib_index, &delme); +} + +static u32 arp_unnumbered (vlib_buffer_t * p0, + u32 pi0, + ethernet_header_t * eth0, + ip_interface_address_t * ifa0) +{ + ethernet_arp_main_t * am = ðernet_arp_main; + vlib_main_t * vm = vlib_get_main(); + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * vim = &vnm->interface_main; + vnet_sw_interface_t * si; + vnet_hw_interface_t * hi; + u32 unnum_src_sw_if_index; + u32 * broadcast_swifs = 0; + u32 * buffers = 0; + u32 n_alloc = 0; + vlib_buffer_t * b0; + int i; + u8 dst_mac_address[6]; + i16 header_size; + ethernet_arp_header_t * arp0; + + /* Save the dst mac address */ + memcpy(dst_mac_address, eth0->dst_address, sizeof (dst_mac_address)); + + /* Figure out which sw_if_index supplied the address */ + unnum_src_sw_if_index = ifa0->sw_if_index; + + /* Track down all users of the unnumbered source */ + pool_foreach (si, vim->sw_interfaces, + ({ + if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED && + (si->unnumbered_sw_if_index == unnum_src_sw_if_index)) + { + vec_add1 (broadcast_swifs, si->sw_if_index); + } + })); + + + ASSERT (vec_len(broadcast_swifs)); + + /* Allocate buffering if we need it */ + if (vec_len(broadcast_swifs) > 1) + { + vec_validate (buffers, vec_len(broadcast_swifs)-2); + n_alloc = vlib_buffer_alloc (vm, buffers, vec_len(buffers)); + _vec_len (buffers) = n_alloc; + for (i = 0; i < n_alloc; i++) + { + b0 = vlib_get_buffer (vm, buffers[i]); + + /* xerox (partially built) ARP pkt */ + memcpy (b0->data, p0->data, p0->current_length + p0->current_data); + b0->current_data = p0->current_data; + b0->current_length = p0->current_length; + vnet_buffer(b0)->sw_if_index[VLIB_RX] = + vnet_buffer(p0)->sw_if_index[VLIB_RX]; + } + } + + vec_insert (buffers, 1, 0); + buffers[0] = pi0; + + for (i = 0; i < vec_len(buffers); i++) + { + b0 = vlib_get_buffer(vm, buffers[i]); + arp0 = vlib_buffer_get_current (b0); + + hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[i]); + si = vnet_get_sw_interface (vnm, broadcast_swifs[i]); + + /* For decoration, most likely */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = hi->sw_if_index; + + /* Fix ARP pkt src address */ + memcpy (arp0->ip4_over_ethernet[0].ethernet, hi->hw_address, 6); + + /* Build L2 encaps for this swif */ + header_size = sizeof (ethernet_header_t); + if (si->sub.eth.flags.one_tag) + header_size += 4; + else if (si->sub.eth.flags.two_tags) + header_size += 8; + + vlib_buffer_advance (b0, -header_size); + eth0 = vlib_buffer_get_current (b0); + + if (si->sub.eth.flags.one_tag) { + ethernet_vlan_header_t * outer = (void *) (eth0 + 1); + + eth0->type = si->sub.eth.flags.dot1ad ? + clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) : + clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + outer->priority_cfi_and_id = + clib_host_to_net_u16 (si->sub.eth.outer_vlan_id); + outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP); + + } else if (si->sub.eth.flags.two_tags) { + ethernet_vlan_header_t * outer = (void *) (eth0 + 1); + ethernet_vlan_header_t * inner = (void *) (outer + 1); + + eth0->type = si->sub.eth.flags.dot1ad ? + clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) : + clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + outer->priority_cfi_and_id = + clib_host_to_net_u16 (si->sub.eth.outer_vlan_id); + outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + inner->priority_cfi_and_id = + clib_host_to_net_u16 (si->sub.eth.inner_vlan_id); + inner->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP); + + } else { + eth0->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP); + } + + /* Restore the original dst address, set src address */ + memcpy (eth0->dst_address, dst_mac_address, sizeof (eth0->dst_address)); + memcpy (eth0->src_address, hi->hw_address, sizeof (eth0->src_address)); + + /* Transmit replicas */ + if (i > 0) + { + vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 * to_next = vlib_frame_vector_args (f); + to_next[0] = buffers[i]; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } + } + + hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[0]); + + vec_free (broadcast_swifs); + vec_free (buffers); + + /* The regular path outputs the original pkt.. */ + return vec_elt (am->arp_input_next_index_by_hw_if_index, hi->hw_if_index); +} + +static uword +arp_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + ethernet_arp_main_t * am = ðernet_arp_main; + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im4 = &ip4_main; + u32 n_left_from, next_index, * from, * to_next; + u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (ethernet_arp_input_trace_t)); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + vnet_hw_interface_t * hw_if0; + ethernet_arp_header_t * arp0; + ethernet_header_t * eth0; + ip_interface_address_t * ifa0; + ip_adjacency_t * adj0; + ip4_address_t * if_addr0; + ip4_address_t proxy_src; + u32 pi0, error0, next0, sw_if_index0; + u8 is_request0, src_is_local0, dst_is_local0, is_unnum0; + ethernet_proxy_arp_t * pa; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + arp0 = vlib_buffer_get_current (p0); + + is_request0 = arp0->opcode + == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request); + + error0 = ETHERNET_ARP_ERROR_replies_sent; + + error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) + ? ETHERNET_ARP_ERROR_l2_type_not_ethernet + : error0); + error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4) + ? ETHERNET_ARP_ERROR_l3_type_not_ip4 + : error0); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + if (error0) + goto drop1; + + /* Check that IP address is local and matches incoming interface. */ + if_addr0 = ip4_interface_address_matching_destination (im4, + &arp0->ip4_over_ethernet[1].ip4, + sw_if_index0, + &ifa0); + if (! if_addr0) + { + error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local; + goto drop1; + } + + /* Honor unnumbered interface, if any */ + is_unnum0 = sw_if_index0 != ifa0->sw_if_index; + + /* Source must also be local to subnet of matching interface address. */ + if (! ip4_destination_matches_interface (im4, &arp0->ip4_over_ethernet[0].ip4, ifa0)) + { + error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local; + goto drop1; + } + + /* Reject requests/replies with our local interface address. */ + src_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[0].ip4.as_u32; + if (src_is_local0) + { + error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local; + goto drop1; + } + + dst_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[1].ip4.as_u32; + + /* Fill in ethernet header. */ + eth0 = ethernet_buffer_get_header (p0); + + /* Trash ARP packets whose ARP-level source addresses do not + match their L2-frame-level source addresses */ + if (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet, + sizeof (eth0->src_address))) + { + error0 = ETHERNET_ARP_ERROR_l2_address_mismatch; + goto drop2; + } + + /* Learn or update sender's mapping only for requests or unicasts + that don't match local interface address. */ + if (ethernet_address_cast (eth0->dst_address) == ETHERNET_ADDRESS_UNICAST + || is_request0) + { + if (am->limit_arp_cache_size && + pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size) + unset_random_arp_entry(); + + vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0, + (u32)~0 /* default fib */, + &arp0->ip4_over_ethernet[0], + 0 /* is_static */); + error0 = ETHERNET_ARP_ERROR_l3_src_address_learned; + } + + /* Only send a reply for requests sent which match a local interface. */ + if (! (is_request0 && dst_is_local0)) + { + error0 = (arp0->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply) + ? ETHERNET_ARP_ERROR_replies_received : error0); + goto drop1; + } + + /* Send a reply. */ + send_reply: + vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + /* Can happen in a multi-core env. */ + if (PREDICT_FALSE(hw_if0->hw_if_index >= vec_len (am->arp_input_next_index_by_hw_if_index))) + { + error0 = ETHERNET_ARP_ERROR_missing_interface_address; + goto drop2; + } + + next0 = vec_elt (am->arp_input_next_index_by_hw_if_index, hw_if0->hw_if_index); + + arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply); + + arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0]; + + memcpy (arp0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address, 6); + clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) = if_addr0->data_u32; + + /* Hardware must be ethernet-like. */ + ASSERT (vec_len (hw_if0->hw_address) == 6); + + memcpy (eth0->dst_address, eth0->src_address, 6); + memcpy (eth0->src_address, hw_if0->hw_address, 6); + + /* Figure out how much to rewind current data from adjacency. */ + if (ifa0) + { + adj0 = ip_get_adjacency (&ip4_main.lookup_main, + ifa0->neighbor_probe_adj_index); + if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) + { + error0 = ETHERNET_ARP_ERROR_missing_interface_address; + goto drop2; + } + if (is_unnum0) + next0 = arp_unnumbered (p0, pi0, eth0, ifa0); + else + vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next, + n_left_to_next,pi0,next0); + + n_replies_sent += 1; + continue; + + drop1: + if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 || + (arp0->ip4_over_ethernet[0].ip4.as_u32 == + arp0->ip4_over_ethernet[1].ip4.as_u32)) + { + error0 = ETHERNET_ARP_ERROR_gratuitous_arp; + goto drop2; + } + /* See if proxy arp is configured for the address */ + if (is_request0) + { + vnet_sw_interface_t * si; + u32 this_addr = clib_net_to_host_u32 + (arp0->ip4_over_ethernet[1].ip4.as_u32); + u32 fib_index0; + + si = vnet_get_sw_interface (vnm, sw_if_index0); + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP)) + goto drop2; + + fib_index0 = vec_elt (im4->fib_index_by_sw_if_index, + sw_if_index0); + + vec_foreach (pa, am->proxy_arps) + { + u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr); + u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr); + + /* an ARP request hit in the proxy-arp table? */ + if ((this_addr >= lo_addr && this_addr <= hi_addr) && + (fib_index0 == pa->fib_index)) + { + eth0 = ethernet_buffer_get_header (p0); + proxy_src.as_u32 = + arp0->ip4_over_ethernet[1].ip4.data_u32; + + /* + * Rewind buffer, direct code above not to + * think too hard about it. + * $$$ is the answer ever anything other than + * vlib_buffer_reset(..)? + */ + ifa0 = 0; + if_addr0 = &proxy_src; + vlib_buffer_reset (p0); + n_proxy_arp_replies_sent++; + goto send_reply; + } + } + } + + drop2: + + next0 = ARP_INPUT_NEXT_DROP; + p0->error = node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next, + n_left_to_next,pi0,next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_error_count (vm, node->node_index, + ETHERNET_ARP_ERROR_replies_sent, + n_replies_sent - n_proxy_arp_replies_sent); + + vlib_error_count (vm, node->node_index, + ETHERNET_ARP_ERROR_proxy_arp_replies_sent, + n_proxy_arp_replies_sent); + return frame->n_vectors; +} + +static char * ethernet_arp_error_strings[] = { +#define _(sym,string) string, + foreach_ethernet_arp_error +#undef _ +}; + +VLIB_REGISTER_NODE (arp_input_node,static) = { + .function = arp_input, + .name = "arp-input", + .vector_size = sizeof (u32), + + .n_errors = ETHERNET_ARP_N_ERROR, + .error_strings = ethernet_arp_error_strings, + + .n_next_nodes = ARP_INPUT_N_NEXT, + .next_nodes = { + [ARP_INPUT_NEXT_DROP] = "error-drop", + }, + + .format_buffer = format_ethernet_arp_header, + .format_trace = format_ethernet_arp_input_trace, +}; + +clib_error_t * +ethernet_arp_hw_interface_link_up_down (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags) +{ + ethernet_arp_main_t * am = ðernet_arp_main; + vnet_hw_interface_t * hw_if; + + hw_if = vnet_get_hw_interface (vnm, hw_if_index); + + /* Fill in lookup tables with default table (0). */ + vec_validate_init_empty (am->arp_input_next_index_by_hw_if_index, hw_if_index, ~0); + am->arp_input_next_index_by_hw_if_index[hw_if_index] + = vlib_node_add_next (vnm->vlib_main, arp_input_node.index, hw_if->output_node_index); + + return 0; +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (ethernet_arp_hw_interface_link_up_down); + +static int +ip4_arp_entry_sort (void *a1, void *a2) +{ + ethernet_arp_ip4_entry_t * e1 = a1; + ethernet_arp_ip4_entry_t * e2 = a2; + + int cmp; + vnet_main_t * vnm = vnet_get_main(); + + cmp = vnet_sw_interface_compare + (vnm, e1->key.sw_if_index, e2->key.sw_if_index); + if (! cmp) + cmp = ip4_address_compare (&e1->key.ip4_address, &e2->key.ip4_address); + return cmp; +} + +static clib_error_t * +show_ip4_arp (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + ethernet_arp_main_t * am = ðernet_arp_main; + ethernet_arp_ip4_entry_t * e, * es; + ethernet_proxy_arp_t * pa; + clib_error_t * error = 0; + u32 sw_if_index; + + /* Filter entries by interface if given. */ + sw_if_index = ~0; + (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index); + + es = 0; + pool_foreach (e, am->ip4_entry_pool, ({ vec_add1 (es, e[0]); })); + vec_sort_with_function (es, ip4_arp_entry_sort); + vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0); + vec_foreach (e, es) { + if (sw_if_index != ~0 && e->key.sw_if_index != sw_if_index) + continue; + vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e); + } + vec_free (es); + + if (vec_len (am->proxy_arps)) + { + vlib_cli_output (vm, "Proxy arps enabled for:"); + vec_foreach(pa, am->proxy_arps) + { + vlib_cli_output (vm, "Fib_index %d %U - %U ", + pa->fib_index, + format_ip4_address, &pa->lo_addr, + format_ip4_address, &pa->hi_addr); + } + } + + return error; +} + +VLIB_CLI_COMMAND (show_ip4_arp_command, static) = { + .path = "show ip arp", + .function = show_ip4_arp, + .short_help = "Show ARP table", +}; + +typedef struct { + pg_edit_t l2_type, l3_type; + pg_edit_t n_l2_address_bytes, n_l3_address_bytes; + pg_edit_t opcode; + struct { + pg_edit_t ethernet; + pg_edit_t ip4; + } ip4_over_ethernet[2]; +} pg_ethernet_arp_header_t; + +static inline void +pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f); + _ (l2_type); + _ (l3_type); + _ (n_l2_address_bytes); + _ (n_l3_address_bytes); + _ (opcode); + _ (ip4_over_ethernet[0].ethernet); + _ (ip4_over_ethernet[0].ip4); + _ (ip4_over_ethernet[1].ethernet); + _ (ip4_over_ethernet[1].ip4); +#undef _ +} + +uword +unformat_pg_arp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_ethernet_arp_header_t * p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t), + &group_index); + pg_ethernet_arp_header_init (p); + + /* Defaults. */ + pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet); + pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4); + pg_edit_set_fixed (&p->n_l2_address_bytes, 6); + pg_edit_set_fixed (&p->n_l3_address_bytes, 4); + + if (! unformat (input, "%U: %U/%U -> %U/%U", + unformat_pg_edit, + unformat_ethernet_arp_opcode_net_byte_order, &p->opcode, + unformat_pg_edit, + unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet, + unformat_pg_edit, + unformat_ip4_address, &p->ip4_over_ethernet[0].ip4, + unformat_pg_edit, + unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet, + unformat_pg_edit, + unformat_ip4_address, &p->ip4_over_ethernet[1].ip4)) + { + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; + } + return 1; +} + +clib_error_t *ip4_set_arp_limit (u32 arp_limit) +{ + ethernet_arp_main_t * am = ðernet_arp_main; + + am->limit_arp_cache_size = arp_limit; + return 0; +} + +static clib_error_t * ethernet_arp_init (vlib_main_t * vm) +{ + ethernet_arp_main_t * am = ðernet_arp_main; + pg_node_t * pn; + + ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index); + + pn = pg_get_node (arp_input_node.index); + pn->unformat_edit = unformat_pg_arp_header; + + am->opcode_by_name = hash_create_string (0, sizeof (uword)); +#define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o); + foreach_ethernet_arp_opcode; +#undef _ + + mhash_init (&am->ip4_entry_by_key, + /* value size */ sizeof (uword), + /* key size */ sizeof (ethernet_arp_ip4_key_t)); + + /* $$$ configurable */ + am->limit_arp_cache_size = 50000; + + am->pending_resolutions_by_address = hash_create (0, sizeof (uword)); + am->mac_changes_by_address = hash_create (0, sizeof (uword)); + + /* don't trace ARP error packets */ + { + vlib_node_runtime_t *rt = + vlib_node_get_runtime (vm, arp_input_node.index); + +#define _(a,b) \ + vnet_pcap_drop_trace_filter_add_del \ + (rt->errors[ETHERNET_ARP_ERROR_##a], \ + 1 /* is_add */); + foreach_ethernet_arp_error +#undef _ + } + + return 0; +} + +VLIB_INIT_FUNCTION (ethernet_arp_init); + +int +vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, u32 fib_index, + void * a_arg) +{ + ethernet_arp_ip4_over_ethernet_address_t * a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args; + + args.sw_if_index = sw_if_index; + args.fib_index = fib_index; + args.is_remove = 1; + memcpy (&args.a, a, sizeof (*a)); + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) &args, sizeof (args)); + return 0; +} + +static inline int +vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm, + u32 sw_if_index, + u32 fib_index, + void * a_arg) +{ + ethernet_arp_ip4_entry_t * e; + ethernet_arp_main_t * am = ðernet_arp_main; + ethernet_arp_ip4_over_ethernet_address_t * a = a_arg; + ethernet_arp_ip4_key_t k; + uword * p; + ip4_add_del_route_args_t args; + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 adj_index; + ip_adjacency_t * adj; + + k.sw_if_index = sw_if_index; + k.ip4_address = a->ip4; + k.fib_index = fib_index; + p = mhash_get (&am->ip4_entry_by_key, &k); + if (! p) + return -1; + + memset(&args, 0, sizeof(args)); + + /* + * Make sure that the route actually exists before we try to delete it, + * and make sure that it's a rewrite adjacency. + * + * If we point 1-N unnumbered interfaces at a loopback interface and + * shut down the loopback before shutting down 1-N unnumbered + * interfaces, the ARP cache will still have an entry, + * but the route will have disappeared. + * + * See also ip4_del_interface_routes (...) + * -> ip4_delete_matching_routes (...). + */ + + adj_index = ip4_fib_lookup_with_table + (im, fib_index, &a->ip4, 1 /* disable default route */); + + /* Miss adj? Forget it... */ + if (adj_index != lm->miss_adj_index) { + adj = ip_get_adjacency (lm, adj_index); + /* + * Stupid control-plane trick: + * admin down an interface (removes arp routes from fib), + * bring the interface back up (does not reinstall them) + * then remove the arp cache entry (yuck). When that happens, + * the adj we find here will be the interface subnet ARP adj. + */ + if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) { + args.table_index_or_table_id = fib_index; + args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL + | IP4_ROUTE_FLAG_NEIGHBOR; + args.dst_address = a->ip4; + args.dst_address_length = 32; + ip4_add_del_route (im, &args); + ip4_maybe_remap_adjacencies (im, fib_index, args.flags); + } + } + + e = pool_elt_at_index (am->ip4_entry_pool, p[0]); + mhash_unset (&am->ip4_entry_by_key, &e->key, 0); + pool_put (am->ip4_entry_pool, e); + return 0; +} + +static void +increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t *a) +{ + u8 old; + int i; + + for (i = 3; i >= 0; i--) + { + old = a->ip4.as_u8[i]; + a->ip4.as_u8[i] += 1; + if (old < a->ip4.as_u8[i]) + break; + } + + for (i = 5; i >= 0; i--) + { + old = a->ethernet[i]; + a->ethernet[i] += 1; + if (old < a->ethernet[i]) + break; + } +} + +int vnet_proxy_arp_add_del (ip4_address_t *lo_addr, + ip4_address_t *hi_addr, + u32 fib_index, int is_del) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_proxy_arp_t *pa; + u32 found_at_index = ~0; + + vec_foreach (pa, am->proxy_arps) + { + if (pa->lo_addr == lo_addr->as_u32 + && pa->hi_addr == hi_addr->as_u32 + && pa->fib_index == fib_index) + { + found_at_index = pa - am->proxy_arps; + break; + } + } + + if (found_at_index != ~0) + { + /* Delete, otherwise it's already in the table */ + if (is_del) + vec_delete (am->proxy_arps, 1, found_at_index); + return 0; + } + /* delete, no such entry */ + if (is_del) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* add, not in table */ + vec_add2 (am->proxy_arps, pa, 1); + pa->lo_addr = lo_addr->as_u32; + pa->hi_addr = hi_addr->as_u32; + pa->fib_index = fib_index; + return 0; +} + +/* + * Remove any proxy arp entries asdociated with the + * specificed fib. + */ +int vnet_proxy_arp_fib_reset (u32 fib_id) +{ + ip4_main_t * im = &ip4_main; + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_proxy_arp_t *pa; + u32 * entries_to_delete = 0; + u32 fib_index; + uword * p; + int i; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_ENTRY; + fib_index = p[0]; + + vec_foreach (pa, am->proxy_arps) + { + if (pa->fib_index == fib_index) + { + vec_add1 (entries_to_delete, pa - am->proxy_arps); + } + } + + for (i = 0; i < vec_len(entries_to_delete); i++) + { + vec_delete (am->proxy_arps, 1, entries_to_delete[i]); + } + + vec_free (entries_to_delete); + + return 0; +} + +static clib_error_t * +ip_arp_add_del_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 sw_if_index; + ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr; + int addr_valid = 0; + int is_del = 0; + int count = 1; + u32 fib_index = 0; + u32 fib_id; + int is_static = 0; + int is_proxy = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */ + if (unformat (input, "%U %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip4_address, &addr.ip4, + unformat_ethernet_address, &addr.ethernet)) + addr_valid = 1; + + else if (unformat (input, "delete") || unformat (input, "del")) + is_del = 1; + + else if (unformat (input, "static")) + is_static = 1; + + else if (unformat (input, "count %d", &count)) + ; + + else if (unformat (input, "fib-id %d", &fib_id)) + { + ip4_main_t * im = &ip4_main; + uword * p = hash_get (im->fib_index_by_table_id, fib_id); + if (! p) + return clib_error_return (0, "fib ID %d doesn't exist\n", + fib_id); + fib_index = p[0]; + } + + else if (unformat (input, "proxy %U - %U", + unformat_ip4_address, &lo_addr.ip4, + unformat_ip4_address, &hi_addr.ip4)) + is_proxy = 1; + else + break; + } + + if (is_proxy) + { + (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4, + fib_index, is_del); + return 0; + } + + if (addr_valid) + { + int i; + + for (i = 0; i < count; i++) + { + if (is_del == 0) + { + uword event_type, * event_data = 0; + + /* Park the debug CLI until the arp entry is installed */ + vnet_register_ip4_arp_resolution_event + (vnm, &addr.ip4, vlib_current_process(vm), + 1 /* type */, 0 /* data */); + + vnet_arp_set_ip4_over_ethernet + (vnm, sw_if_index, fib_index, &addr, is_static); + + vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length(event_data); + if (event_type != 1) + clib_warning ("event type %d unexpected", event_type); + } + else + vnet_arp_unset_ip4_over_ethernet + (vnm, sw_if_index, fib_index, &addr); + + increment_ip4_and_mac_address (&addr); + } + } + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + return 0; +} + +VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = { + .path = "set ip arp", + .short_help = "set ip arp [del] <intfc> <ip-address> <mac-address>", + .function = ip_arp_add_del_command_fn, +}; + +static clib_error_t * +set_int_proxy_arp_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 sw_if_index; + vnet_sw_interface_t * si; + int enable = 0; + int intfc_set = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + intfc_set = 1; + else if (unformat (input, "enable") || unformat (input, "on")) + enable = 1; + else if (unformat (input, "disable") || unformat (input, "off")) + enable = 0; + else + break; + } + + if (intfc_set == 0) + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + + si = vnet_get_sw_interface (vnm, sw_if_index); + ASSERT(si); + if (enable) + si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP; + else + si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP; + + return 0; +} + +VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = { + .path = "set interface proxy-arp", + .short_help = "set interface proxy-arp <intfc> [enable|disable]", + .function = set_int_proxy_arp_command_fn, +}; + + +/* + * ARP Termination in a L2 Bridge Domain based on an + * IP4 to MAC hash table mac_by_ip4 for each BD. + */ +typedef enum { + ARP_TERM_NEXT_L2_OUTPUT, + ARP_TERM_NEXT_DROP, + ARP_TERM_N_NEXT, +} arp_term_next_t; + +u32 arp_term_next_node_index[32]; + +static uword +arp_term_l2bd (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + l2input_main_t * l2im = &l2input_main; + u32 n_left_from, next_index, * from, * to_next; + u32 n_replies_sent = 0; + u16 last_bd_index = ~0; + l2_bridge_domain_t * last_bd_config = 0; + l2_input_config_t * cfg0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ethernet_header_t * eth0; + ethernet_arp_header_t * arp0; + u8 * l3h0; + u32 pi0, error0, next0, sw_if_index0; + u16 ethertype0; + u16 bd_index0; + u32 ip0; + u8 * macp0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + eth0 = vlib_buffer_get_current (p0); + l3h0 = (u8 *)eth0 + vnet_buffer(p0)->l2.l2_len; + ethertype0 = clib_net_to_host_u16(*(u16 *)(l3h0 - 2)); + arp0 = (ethernet_arp_header_t *) l3h0; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (p0->flags & VLIB_BUFFER_IS_TRACED))) + { + u8 *t0 = vlib_add_trace ( + vm, node, p0, sizeof(ethernet_arp_input_trace_t)); + memcpy (t0, l3h0, sizeof(ethernet_arp_input_trace_t)); + } + + if (PREDICT_FALSE ( + (ethertype0 != ETHERNET_TYPE_ARP) || + (arp0->opcode != clib_host_to_net_u16(ETHERNET_ARP_OPCODE_request)))) + goto next_l2_feature; + + error0 = ETHERNET_ARP_ERROR_replies_sent; + error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) + ? ETHERNET_ARP_ERROR_l2_type_not_ethernet + : error0); + error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4) + ? ETHERNET_ARP_ERROR_l3_type_not_ip4 + : error0); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + if (error0) + goto drop; + + // Trash ARP packets whose ARP-level source addresses do not + // match their L2-frame-level source addresses */ + if (PREDICT_FALSE ( + memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet, + sizeof (eth0->src_address)))) + { + error0 = ETHERNET_ARP_ERROR_l2_address_mismatch; + goto drop; + } + + // Check if anyone want ARP request events for L2 BDs + { + pending_resolution_t * mc; + ethernet_arp_main_t * am = ðernet_arp_main; + uword *p = hash_get (am->mac_changes_by_address, 0); + if (p && (vnet_buffer(p0)->l2.shg == 0)) + { // Only SHG 0 interface which is more likely local + u32 next_index = p[0]; + while (next_index != (u32)~0) + { + int (*fp)(u32, u8 *, u32, u32); + int rv = 1; + mc = pool_elt_at_index (am->mac_changes, next_index); + fp = mc->data_callback; + // Call the callback, return 1 to suppress dup events */ + if (fp) rv = (*fp)(mc->data, + arp0->ip4_over_ethernet[0].ethernet, + sw_if_index0, + arp0->ip4_over_ethernet[0].ip4.as_u32); + // Signal the resolver process + if (rv == 0) + vlib_process_signal_event (vm, mc->node_index, + mc->type_opaque, + mc->data); + next_index = mc->next_index; + } + } + } + + // lookup BD mac_by_ip4 hash table for MAC entry + ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32; + bd_index0 = vnet_buffer(p0)->l2.bd_index; + if (PREDICT_FALSE ( + (bd_index0 != last_bd_index) || (last_bd_index == ~0))) + { + last_bd_index = bd_index0; + last_bd_config = vec_elt_at_index(l2im->bd_configs, bd_index0); + } + macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0); + + if (PREDICT_FALSE(!macp0)) + goto next_l2_feature; // MAC not found + + // MAC found, send ARP reply - + // Convert ARP request packet to ARP reply + arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply); + arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0]; + arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0; + memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6); + memcpy (eth0->dst_address, eth0->src_address, 6); + memcpy (eth0->src_address, macp0, 6); + n_replies_sent += 1; + + // For BVI, need to use l2-fwd node to send ARP reply as + // l2-output node cannot output packet to BVI properly + cfg0 = vec_elt_at_index(l2im->configs, sw_if_index0); + if (PREDICT_FALSE (cfg0->bvi)) + { + vnet_buffer(p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD; + vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0; + goto next_l2_feature; + } + + // Send ARP reply back out input interface through l2-output + vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + next0 = ARP_TERM_NEXT_L2_OUTPUT; + // Note that output to VXLAN tunnel will fail due to SHG which + // is probably desireable since ARP termination is not intended + // for ARP requests from other hosts. If output to VXLAN tunnel is + // required, however, can just clear the SHG in packet as follows: + // vnet_buffer(p0)->l2.shg = 0; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next, + n_left_to_next,pi0,next0); + continue; + + next_l2_feature: + { + u32 feature_bitmap0 = + vnet_buffer(p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM; + vnet_buffer(p0)->l2.feature_bitmap = feature_bitmap0; + next0 = feat_bitmap_get_next_node_index(arp_term_next_node_index, + feature_bitmap0); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next, + n_left_to_next,pi0,next0); + continue; + } + + drop: + if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 || + (arp0->ip4_over_ethernet[0].ip4.as_u32 == + arp0->ip4_over_ethernet[1].ip4.as_u32)) + { + error0 = ETHERNET_ARP_ERROR_gratuitous_arp; + } + next0 = ARP_TERM_NEXT_DROP; + p0->error = node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next, + n_left_to_next,pi0,next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_error_count (vm, node->node_index, + ETHERNET_ARP_ERROR_replies_sent, + n_replies_sent); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (arp_term_l2bd_node,static) = { + .function = arp_term_l2bd, + .name = "arp-term-l2bd", + .vector_size = sizeof (u32), + + .n_errors = ETHERNET_ARP_N_ERROR, + .error_strings = ethernet_arp_error_strings, + + .n_next_nodes = ARP_TERM_N_NEXT, + .next_nodes = { + [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output", + [ARP_TERM_NEXT_DROP] = "error-drop", + }, + + .format_buffer = format_ethernet_arp_header, + .format_trace = format_ethernet_arp_input_trace, +}; + +clib_error_t *arp_term_init (vlib_main_t *vm) +{ // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + arp_term_l2bd_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + arp_term_next_node_index); + return 0; +} + +VLIB_INIT_FUNCTION (arp_term_init); diff --git a/vnet/vnet/ethernet/arp_packet.h b/vnet/vnet/ethernet/arp_packet.h new file mode 100644 index 00000000000..c2214949d89 --- /dev/null +++ b/vnet/vnet/ethernet/arp_packet.h @@ -0,0 +1,134 @@ +/* + * ethernet/arp.c: IP v4 ARP node + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_ethernet_arp_packet_h +#define included_ethernet_arp_packet_h + +#define foreach_ethernet_arp_hardware_type \ + _ (0, reserved) \ + _ (1, ethernet) \ + _ (2, experimental_ethernet) \ + _ (3, ax_25) \ + _ (4, proteon_pronet_token_ring) \ + _ (5, chaos) \ + _ (6, ieee_802) \ + _ (7, arcnet) \ + _ (8, hyperchannel) \ + _ (9, lanstar) \ + _ (10, autonet) \ + _ (11, localtalk) \ + _ (12, localnet) \ + _ (13, ultra_link) \ + _ (14, smds) \ + _ (15, frame_relay) \ + _ (16, atm) \ + _ (17, hdlc) \ + _ (18, fibre_channel) \ + _ (19, atm19) \ + _ (20, serial_line) \ + _ (21, atm21) \ + _ (22, mil_std_188_220) \ + _ (23, metricom) \ + _ (24, ieee_1394) \ + _ (25, mapos) \ + _ (26, twinaxial) \ + _ (27, eui_64) \ + _ (28, hiparp) \ + _ (29, iso_7816_3) \ + _ (30, arpsec) \ + _ (31, ipsec_tunnel) \ + _ (32, infiniband) \ + _ (33, cai) \ + _ (34, wiegand) \ + _ (35, pure_ip) \ + _ (36, hw_exp1) \ + _ (256, hw_exp2) + +#define foreach_ethernet_arp_opcode \ + _ (reserved) \ + _ (request) \ + _ (reply) \ + _ (reverse_request) \ + _ (reverse_reply) \ + _ (drarp_request) \ + _ (drarp_reply) \ + _ (drarp_error) \ + _ (inarp_request) \ + _ (inarp_reply) \ + _ (arp_nak) \ + _ (mars_request) \ + _ (mars_multi) \ + _ (mars_mserv) \ + _ (mars_join) \ + _ (mars_leave) \ + _ (mars_nak) \ + _ (mars_unserv) \ + _ (mars_sjoin) \ + _ (mars_sleave) \ + _ (mars_grouplist_request) \ + _ (mars_grouplist_reply) \ + _ (mars_redirect_map) \ + _ (mapos_unarp) \ + _ (exp1) \ + _ (exp2) + +typedef enum { +#define _(n,f) ETHERNET_ARP_HARDWARE_TYPE_##f = (n), + foreach_ethernet_arp_hardware_type +#undef _ +} ethernet_arp_hardware_type_t; + +typedef enum { +#define _(f) ETHERNET_ARP_OPCODE_##f, + foreach_ethernet_arp_opcode +#undef _ + ETHERNET_ARP_N_OPCODE, +} ethernet_arp_opcode_t; + +typedef enum { + IP4_ARP_NEXT_DROP, + IP4_ARP_N_NEXT, +} ip4_arp_next_t; + +typedef enum { + IP4_ARP_ERROR_DROP, + IP4_ARP_ERROR_REQUEST_SENT, + IP4_ARP_ERROR_NON_ARP_ADJ, + IP4_ARP_ERROR_REPLICATE_DROP, + IP4_ARP_ERROR_REPLICATE_FAIL +} ip4_arp_error_t; + +typedef CLIB_PACKED (struct { + u8 ethernet[6]; + ip4_address_t ip4; +}) ethernet_arp_ip4_over_ethernet_address_t; + +typedef struct { + u16 l2_type; + u16 l3_type; + u8 n_l2_address_bytes; + u8 n_l3_address_bytes; + u16 opcode; + union { + ethernet_arp_ip4_over_ethernet_address_t ip4_over_ethernet[2]; + + /* Others... */ + u8 data[0]; + }; +} ethernet_arp_header_t; + +#endif /* included_ethernet_arp_packet_h */ diff --git a/vnet/vnet/ethernet/cli.c b/vnet/vnet/ethernet/cli.c new file mode 100644 index 00000000000..cbf240c7f68 --- /dev/null +++ b/vnet/vnet/ethernet/cli.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * cli.c: ethernet CLI + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + +VLIB_CLI_COMMAND (vlib_cli_ethernet_command, static) = { + .path = "ethernet", + .short_help = "Ethernet commands", +}; + +static clib_error_t * +promiscuous_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 hw_if_index; + u32 flags = ETHERNET_INTERFACE_FLAG_ACCEPT_ALL; + + if (unformat (input, "on %U", + unformat_ethernet_interface, vnm, &hw_if_index)) + { + ethernet_set_flags (vnm, hw_if_index, flags); + } + else if (unformat (input, "off %U", + unformat_ethernet_interface, vnm, &hw_if_index)) + { + flags = 0; + ethernet_set_flags (vnm, hw_if_index, flags); + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + return 0; +} + +VLIB_CLI_COMMAND (ethernet_promiscuous_command, static) = { + .path = "ethernet promiscuous", + .short_help = "ethernet promiscuous [on | off] <intfc>", + .function = promiscuous_cmd, +}; + +static clib_error_t * +mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 hw_if_index, mtu; + u32 flags = ETHERNET_INTERFACE_FLAG_MTU; + + if (unformat (input, "%d %U", &mtu, + unformat_ethernet_interface, vnm, &hw_if_index)) + { + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); + + if (mtu < ETHERNET_MIN_PACKET_BYTES) + return clib_error_return (0, "Invalid mtu (%d): " + "must be >= min pkt bytes (%d)", mtu, + hi->min_packet_bytes); + + if (mtu > ETHERNET_MAX_PACKET_BYTES) + return clib_error_return (0, "Invalid mtu (%d): must be <= 9216", mtu); + + if (hi->max_packet_bytes != mtu) + { + hi->max_packet_bytes = mtu; + ethernet_set_flags (vnm, hw_if_index, flags); + } + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + return 0; +} + +VLIB_CLI_COMMAND (ethernet_mtu_command, static) = { + .path = "ethernet mtu", + .short_help = "ethernet mtu <64-9216> <intfc>", + .function = mtu_cmd, +}; + +clib_error_t * +ethernet_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (ethernet_cli_init); diff --git a/vnet/vnet/ethernet/error.def b/vnet/vnet/ethernet/error.def new file mode 100644 index 00000000000..36679c0ce1c --- /dev/null +++ b/vnet/vnet/ethernet/error.def @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_error.def: ethernet errors + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +ethernet_error (NONE, PUNT, "no error") +ethernet_error (BAD_LLC_LENGTH, DROP, "llc length > packet length") +ethernet_error (UNKNOWN_TYPE, PUNT, "unknown ethernet type") +ethernet_error (UNKNOWN_VLAN, DROP, "unknown vlan") +ethernet_error (L3_MAC_MISMATCH, DROP, "l3 mac mismatch") +ethernet_error (DOWN, DROP, "subinterface down") + diff --git a/vnet/vnet/ethernet/ethernet.h b/vnet/vnet/ethernet/ethernet.h new file mode 100644 index 00000000000..266e1d79afb --- /dev/null +++ b/vnet/vnet/ethernet/ethernet.h @@ -0,0 +1,450 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet.h: types/functions for ethernet. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ethernet_h +#define included_ethernet_h + +#include <vnet/vnet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/pg/pg.h> + +always_inline u64 +ethernet_mac_address_u64 (u8 * a) +{ return (((u64) a[0] << (u64) (5*8)) + | ((u64) a[1] << (u64) (4*8)) + | ((u64) a[2] << (u64) (3*8)) + | ((u64) a[3] << (u64) (2*8)) + | ((u64) a[4] << (u64) (1*8)) + | ((u64) a[5] << (u64) (0*8))); } + +static inline int ethernet_mac_address_is_multicast_u64 (u64 a) +{ + return (a & (1ULL<<(5*8))) != 0; +} + +/* Max. sized ethernet/vlan header for parsing. */ +typedef struct { + ethernet_header_t ethernet; + + /* Allow up to 2 stacked vlan headers. */ + ethernet_vlan_header_t vlan[2]; +} ethernet_max_header_t; + +struct vnet_hw_interface_t; +/* Ethernet flag change callback. */ +typedef u32 (ethernet_flag_change_function_t) +(vnet_main_t * vnm, struct vnet_hw_interface_t * hi, u32 flags); + +#define ETHERNET_MIN_PACKET_BYTES 64 +#define ETHERNET_MAX_PACKET_BYTES 9216 + +/* Ethernet interface instance. */ +typedef struct ethernet_interface { + + /* Accept all packets (promiscuous mode). */ +#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL (1 << 0) +#define ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags) \ + (((flags) & ~ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) == 0) + + /* Change MTU on interface from hw interface structure */ +#define ETHERNET_INTERFACE_FLAG_MTU (1 << 1) +#define ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags) \ + ((flags) & ETHERNET_INTERFACE_FLAG_MTU) + + /* Callback, e.g. to turn on/off promiscuous mode */ + ethernet_flag_change_function_t * flag_change; + + u32 driver_instance; + + /* Ethernet (MAC) address for this interface. */ + u8 address[6]; +} ethernet_interface_t; + +vnet_hw_interface_class_t ethernet_hw_interface_class; + +typedef struct { + /* Name (a c string). */ + char * name; + + /* Ethernet type in host byte order. */ + ethernet_type_t type; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} ethernet_type_info_t; + +typedef enum { +#define ethernet_error(n,c,s) ETHERNET_ERROR_##n, +#include <vnet/ethernet/error.def> +#undef ethernet_error + ETHERNET_N_ERROR, +} ethernet_error_t; + + +// Structs used when parsing packet to find sw_if_index + +typedef struct { + u32 sw_if_index; + u32 flags; + // config entry is-valid flag + // exact match flags (valid if packet has 0/1/2/3 tags) + // L2 vs L3 forwarding mode +#define SUBINT_CONFIG_MATCH_0_TAG (1<<0) +#define SUBINT_CONFIG_MATCH_1_TAG (1<<1) +#define SUBINT_CONFIG_MATCH_2_TAG (1<<2) +#define SUBINT_CONFIG_MATCH_3_TAG (1<<3) +#define SUBINT_CONFIG_VALID (1<<4) +#define SUBINT_CONFIG_L2 (1<<5) + +} subint_config_t; + +always_inline u32 +eth_create_valid_subint_match_flags (u32 num_tags) { + return SUBINT_CONFIG_VALID | (1 << num_tags); +} + + +typedef struct { + subint_config_t untagged_subint; + subint_config_t default_subint; + u16 dot1q_vlans; // pool id for vlan table + u16 dot1ad_vlans; // pool id for vlan table +} main_intf_t; + +typedef struct { + subint_config_t single_tag_subint; + subint_config_t inner_any_subint; + u32 qinqs; // pool id for qinq table +} vlan_intf_t; + +typedef struct { + vlan_intf_t vlans[ETHERNET_N_VLAN]; +} vlan_table_t; + +typedef struct { + subint_config_t subint; +} qinq_intf_t; + +typedef struct { + qinq_intf_t vlans[ETHERNET_N_VLAN]; +} qinq_table_t; + +// Structure mapping to a next index based on ethertype. +// Common ethertypes are stored explicitly, others are +// stored in a sparse table. +typedef struct { + /* Sparse vector mapping ethernet type in network byte order + to next index. */ + u16 * input_next_by_type; + u32 * sparse_index_by_input_next_index; + + /* cached next indexes for common ethertypes */ + u32 input_next_ip4; + u32 input_next_ip6; + u32 input_next_mpls; +} next_by_ethertype_t; + + +typedef struct { + vlib_main_t * vlib_main; + + /* next node index for the L3 input node of each ethertype */ + next_by_ethertype_t l3_next; + + /* next node index for L2 interfaces */ + u32 l2_next; + + /* flag and next node index for L3 redirect */ + u32 redirect_l3; + u32 redirect_l3_next; + + /* Pool of ethernet interface instances. */ + ethernet_interface_t * interfaces; + + ethernet_type_info_t * type_infos; + + /* Hash tables mapping name/type to type info index. */ + uword * type_info_by_name, * type_info_by_type; + + // The root of the vlan parsing tables. A vector with one element + // for each main interface, indexed by hw_if_index. + main_intf_t * main_intfs; + + // Pool of vlan tables + vlan_table_t * vlan_pool; + + // Pool of qinq tables; + qinq_table_t * qinq_pool; + + /* Set to one to use AB.CD.EF instead of A:B:C:D:E:F as ethernet format. */ + int format_ethernet_address_16bit; + +} ethernet_main_t; + +ethernet_main_t ethernet_main; + +always_inline ethernet_type_info_t * +ethernet_get_type_info (ethernet_main_t * em, ethernet_type_t type) +{ + uword * p = hash_get (em->type_info_by_type, type); + return p ? vec_elt_at_index (em->type_infos, p[0]) : 0; +} + +ethernet_interface_t * +ethernet_get_interface (ethernet_main_t * em, u32 hw_if_index); + +clib_error_t * +ethernet_register_interface (vnet_main_t * vnm, + u32 dev_class_index, + u32 dev_instance, + u8 * address, + u32 * hw_if_index_return, + ethernet_flag_change_function_t flag_change); + +void ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index); + +/* Register given node index to take input for given ethernet type. */ +void +ethernet_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index); + +/* Register given node index to take input for packet from L2 interfaces. */ +void +ethernet_register_l2_input (vlib_main_t * vm, + u32 node_index); + +/* Register given node index to take redirected L3 traffic, and enable L3 redirect */ +void +ethernet_register_l3_redirect (vlib_main_t * vm, + u32 node_index); + +/* Formats ethernet address X:X:X:X:X:X */ +u8 * format_ethernet_address (u8 * s, va_list * args); +u8 * format_ethernet_type (u8 * s, va_list * args); +u8 * format_ethernet_header (u8 * s, va_list * args); +u8 * format_ethernet_header_with_length (u8 * s, va_list * args); + +/* Parse ethernet address in either X:X:X:X:X:X unix or X.X.X cisco format. */ +uword +unformat_ethernet_address (unformat_input_t * input, va_list * args); + +/* Parse ethernet type as 0xXXXX or type name from ethernet/types.def. + In either host or network byte order. */ +uword +unformat_ethernet_type_host_byte_order (unformat_input_t * input, + va_list * args); +uword +unformat_ethernet_type_net_byte_order (unformat_input_t * input, + va_list * args); + +/* Parse ethernet header. */ +uword +unformat_ethernet_header (unformat_input_t * input, va_list * args); + +/* Parse ethernet interface name; return hw_if_index. */ +uword unformat_ethernet_interface (unformat_input_t * input, va_list * args); + +uword unformat_pg_ethernet_header (unformat_input_t * input, va_list * args); + +always_inline void +ethernet_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + pg_node_t * pn = pg_get_node (node_index); + + n->format_buffer = format_ethernet_header_with_length; + n->unformat_buffer = unformat_ethernet_header; + pn->unformat_edit = unformat_pg_ethernet_header; +} + +always_inline ethernet_header_t * +ethernet_buffer_get_header (vlib_buffer_t * b) +{ + return (void *) + (b->data + + vnet_buffer (b)->ethernet.start_of_ethernet_header); +} + +ethernet_main_t * ethernet_get_main (vlib_main_t * vm); +u32 ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags); +void ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2); +void ethernet_set_rx_redirect (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 enable); + +int +vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, + u32 fib_index, + void *a_arg, + int is_static); + +int +vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, u32 fib_index, + void * a_arg); + +int vnet_proxy_arp_fib_reset (u32 fib_id); + +clib_error_t * next_by_ethertype_init (next_by_ethertype_t * l3_next); +clib_error_t * next_by_ethertype_register (next_by_ethertype_t * l3_next, + u32 ethertype, + u32 next_index); + +int vnet_create_loopback_interface (u32 * sw_if_indexp, u8 *mac_address); +int vnet_delete_loopback_interface (u32 sw_if_index); + +// Perform ethernet subinterface classification table lookups given +// the ports's sw_if_index and fields extracted from the ethernet header. +// The resulting tables are used by identify_subint(). +always_inline void +eth_vlan_table_lookups (ethernet_main_t *em, + vnet_main_t * vnm, + u32 port_sw_if_index0, + u16 first_ethertype, + u16 outer_id, + u16 inner_id, + vnet_hw_interface_t ** hi, + main_intf_t **main_intf, + vlan_intf_t **vlan_intf, + qinq_intf_t **qinq_intf) +{ + vlan_table_t *vlan_table; + qinq_table_t *qinq_table; + u32 vlan_table_id; + + // Read the main, vlan, and qinq interface table entries + // TODO: Consider if/how to prefetch tables. Also consider + // single-entry cache to skip table lookups and identify_subint() + // processing. + *hi = vnet_get_sup_hw_interface (vnm, port_sw_if_index0); + *main_intf = vec_elt_at_index (em->main_intfs, (*hi)->hw_if_index); + + // Always read the vlan and qinq tables, even if there are not that + // many tags on the packet. This makes the lookups and comparisons + // easier (and less branchy). + vlan_table_id = (first_ethertype == ETHERNET_TYPE_DOT1AD) ? + (*main_intf)->dot1ad_vlans : + (*main_intf)->dot1q_vlans; + vlan_table = vec_elt_at_index (em->vlan_pool, vlan_table_id); + *vlan_intf = &vlan_table->vlans[outer_id]; + + qinq_table = vec_elt_at_index (em->qinq_pool, (*vlan_intf)->qinqs); + *qinq_intf = &qinq_table->vlans[inner_id]; +} + + +// Determine the subinterface for this packet, given the result of the +// vlan table lookups and vlan header parsing. Check the most specific +// matches first. +// Returns 1 if a matching subinterface was found, otherwise returns 0. +always_inline u32 +eth_identify_subint (vnet_hw_interface_t * hi, + vlib_buffer_t * b0, + u32 match_flags, + main_intf_t * main_intf, + vlan_intf_t * vlan_intf, + qinq_intf_t * qinq_intf, + u32 * new_sw_if_index, + u8 * error0, + u32 * is_l2) +{ + subint_config_t * subint; + + // Each comparison is checking both the valid flag and the number of tags + // (incorporating exact-match/non-exact-match). + + // check for specific double tag + subint = &qinq_intf->subint; + if ((subint->flags & match_flags) == match_flags) goto matched; + + // check for specific outer and 'any' inner + subint = &vlan_intf->inner_any_subint; + if ((subint->flags & match_flags) == match_flags) goto matched; + + // check for specific single tag + subint = &vlan_intf->single_tag_subint; + if ((subint->flags & match_flags) == match_flags) goto matched; + + // check for untagged interface + subint = &main_intf->untagged_subint; + if ((subint->flags & match_flags) == match_flags) goto matched; + + // check for default interface + subint = &main_intf->default_subint; + if ((subint->flags & match_flags) == match_flags) goto matched; + + // No matching subinterface + *new_sw_if_index = ~0; + *error0 = ETHERNET_ERROR_UNKNOWN_VLAN; + *is_l2 = 0; + return 0; + + matched: + *new_sw_if_index = subint->sw_if_index; + *is_l2 = subint->flags & SUBINT_CONFIG_L2; + return 1; +} + +always_inline ethernet_main_t * +vnet_get_ethernet_main (void) +{ + return ðernet_main; +} + +void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, + void * address_arg, + uword node_index, + uword type_opaque, + uword data); + + +int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, + void * data_callback, + u32 pid, + void * address_arg, + uword node_index, + uword type_opaque, + uword data, int is_add); + +vlib_node_registration_t ethernet_input_node; + +#endif /* included_ethernet_h */ diff --git a/vnet/vnet/ethernet/format.c b/vnet/vnet/ethernet/format.c new file mode 100644 index 00000000000..be456d56d71 --- /dev/null +++ b/vnet/vnet/ethernet/format.c @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_format.c: ethernet formatting/parsing. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + +u8 * format_ethernet_address (u8 * s, va_list * args) +{ + ethernet_main_t * em = ðernet_main; + u8 * a = va_arg (*args, u8 *); + + if (em->format_ethernet_address_16bit) + return format (s, "%02x%02x.%02x%02x.%02x%02x", + a[0], a[1], a[2], a[3], a[4], a[5]); + else + return format (s, "%02x:%02x:%02x:%02x:%02x:%02x", + a[0], a[1], a[2], a[3], a[4], a[5]); +} + +u8 * format_ethernet_type (u8 * s, va_list * args) +{ + ethernet_type_t type = va_arg (*args, u32); + ethernet_main_t * em = ðernet_main; + ethernet_type_info_t * t = ethernet_get_type_info (em, type); + + if (t) + s = format (s, "%s", t->name); + else + s = format (s, "0x%04x", type); + + return s; +} + +u8 * format_ethernet_header_with_length (u8 * s, va_list * args) +{ + ethernet_max_header_t * m = va_arg (*args, ethernet_max_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + ethernet_main_t * em = ðernet_main; + ethernet_header_t * e = &m->ethernet; + ethernet_vlan_header_t * v; + ethernet_type_t type = clib_net_to_host_u16 (e->type); + u32 n_vlan = 0, i, header_bytes; + uword indent; + + while (type == ETHERNET_TYPE_VLAN + && n_vlan < ARRAY_LEN (m->vlan)) + { + v = m->vlan + n_vlan; + type = clib_net_to_host_u16 (v->type); + n_vlan++; + } + + header_bytes = sizeof (e[0]) + n_vlan * sizeof (v[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "ethernet header truncated"); + + indent = format_get_indent (s); + + s = format (s, "%U: %U -> %U", + format_ethernet_type, type, + format_ethernet_address, e->src_address, + format_ethernet_address, e->dst_address); + + for (i = 0; i < n_vlan; i++) + { + u32 v = clib_net_to_host_u16 (m->vlan[i].priority_cfi_and_id); + u32 vid = (v & 0xfff); + u32 cfi = (v >> 12) & 1; + u32 pri = (v >> 13); + + s = format (s, " vlan %d", vid); + if (pri != 0) + s = format (s, " priority %d", pri); + if (cfi != 0) + s = format (s, " cfi"); + } + + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ethernet_type_info_t * ti; + vlib_node_t * node = 0; + + ti = ethernet_get_type_info (em, type); + if (ti && ti->node_index != ~0) + node = vlib_get_node (em->vlib_main, ti->node_index); + if (node && node->format_buffer) + s = format (s, "\n%U%U", + format_white_space, indent, + node->format_buffer, (void *) m + header_bytes, + max_header_bytes - header_bytes); + } + + return s; +} + +u8 * format_ethernet_header (u8 * s, va_list * args) +{ + ethernet_max_header_t * m = va_arg (*args, ethernet_max_header_t *); + return format (s, "%U", format_ethernet_header_with_length, m, 0); +} + +/* Parse X:X:X:X:X:X unix style ethernet address. */ +static uword +unformat_ethernet_address_unix (unformat_input_t * input, va_list * args) +{ + u8 * result = va_arg (*args, u8 *); + u32 i, a[6]; + + if (! unformat (input, "%_%x:%x:%x:%x:%x:%x%_", + &a[0], &a[1], &a[2], &a[3], &a[4], &a[5])) + return 0; + + /* Check range. */ + for (i = 0; i < ARRAY_LEN (a); i++) + if (a[i] >= (1 << 8)) + return 0; + + for (i = 0; i < ARRAY_LEN (a); i++) + result[i] = a[i]; + + return 1; +} + +/* Parse X.X.X cisco style ethernet address. */ +static uword +unformat_ethernet_address_cisco (unformat_input_t * input, va_list * args) +{ + u8 * result = va_arg (*args, u8 *); + u32 i, a[3]; + + if (! unformat (input, "%_%x.%x.%x%_", &a[0], &a[1], &a[2])) + return 0; + + /* Check range. */ + for (i = 0; i < ARRAY_LEN (a); i++) + if (a[i] >= (1 << 16)) + return 0; + + result[0] = (a[0] >> 8) & 0xff; + result[1] = (a[0] >> 0) & 0xff; + result[2] = (a[1] >> 8) & 0xff; + result[3] = (a[1] >> 0) & 0xff; + result[4] = (a[2] >> 8) & 0xff; + result[5] = (a[2] >> 0) & 0xff; + + return 1; +} + +/* Parse ethernet address; accept either unix or style addresses. */ +uword +unformat_ethernet_address (unformat_input_t * input, va_list * args) +{ + u8 * result = va_arg (*args, u8 *); + return (unformat_user (input, unformat_ethernet_address_unix, result) + || unformat_user (input, unformat_ethernet_address_cisco, result)); +} + +/* Returns ethernet type as an int in host byte order. */ +uword +unformat_ethernet_type_host_byte_order (unformat_input_t * input, + va_list * args) +{ + u16 * result = va_arg (*args, u16 *); + ethernet_main_t * em = ðernet_main; + int type, i; + + /* Numeric type. */ + if (unformat (input, "0x%x", &type) + || unformat (input, "%d", &type)) + { + if (type >= (1 << 16)) + return 0; + *result = type; + return 1; + } + + /* Named type. */ + if (unformat_user (input, unformat_vlib_number_by_name, + em->type_info_by_name, &i)) + { + ethernet_type_info_t * ti = vec_elt_at_index (em->type_infos, i); + *result = ti->type; + return 1; + } + + return 0; +} + +uword +unformat_ethernet_type_net_byte_order (unformat_input_t * input, + va_list * args) +{ + u16 * result = va_arg (*args, u16 *); + if (! unformat_user (input, unformat_ethernet_type_host_byte_order, result)) + return 0; + + *result = clib_host_to_net_u16 ((u16) *result); + return 1; +} + +uword +unformat_ethernet_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + ethernet_max_header_t _m, * m = &_m; + ethernet_header_t * e = &m->ethernet; + u16 type; + u32 n_vlan; + + if (! unformat (input, "%U: %U -> %U", + unformat_ethernet_type_host_byte_order, &type, + unformat_ethernet_address, &e->src_address, + unformat_ethernet_address, &e->dst_address)) + return 0; + + n_vlan = 0; + while (unformat (input, "vlan")) + { + u32 id, priority; + + if (! unformat_user (input, unformat_vlib_number, &id) + || id >= ETHERNET_N_VLAN) + return 0; + + if (unformat (input, "priority %d", &priority)) + { + if (priority >= 8) + return 0; + id |= priority << 13; + } + + if (unformat (input, "cfi")) + id |= 1 << 12; + + /* Too many vlans given. */ + if (n_vlan >= ARRAY_LEN (m->vlan)) + return 0; + + m->vlan[n_vlan].priority_cfi_and_id = clib_host_to_net_u16 (id); + n_vlan++; + } + + if (n_vlan == 0) + e->type = clib_host_to_net_u16 (type); + else + { + int i; + + e->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + for (i = 0; i < n_vlan - 1; i++) + m->vlan[i].type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + m->vlan[n_vlan - 1].type = clib_host_to_net_u16 (type); + } + + /* Add header to result. */ + { + void * p; + u32 n_bytes = sizeof (e[0]) + n_vlan * sizeof (m->vlan[0]); + + vec_add2 (*result, p, n_bytes); + memcpy (p, m, n_bytes); + } + + return 1; +} diff --git a/vnet/vnet/ethernet/init.c b/vnet/vnet/ethernet/init.c new file mode 100644 index 00000000000..4ac14e208be --- /dev/null +++ b/vnet/vnet/ethernet/init.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_init.c: ethernet initialization + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + +/* Global main structure. */ +ethernet_main_t ethernet_main; + +static void add_type (ethernet_main_t * em, + ethernet_type_t type, + char * type_name) +{ + ethernet_type_info_t * ti; + u32 i; + + vec_add2 (em->type_infos, ti, 1); + i = ti - em->type_infos; + + ti->name = type_name; + ti->type = type; + ti->next_index = ti->node_index = ~0; + + hash_set (em->type_info_by_type, type, i); + hash_set_mem (em->type_info_by_name, ti->name, i); +} + +static clib_error_t * ethernet_init (vlib_main_t * vm) +{ + ethernet_main_t * em = ðernet_main; + clib_error_t * error; + + em->vlib_main = vm; + + em->type_info_by_name = hash_create_string (0, sizeof (uword)); + em->type_info_by_type = hash_create (0, sizeof (uword)); + +#define ethernet_type(n,s) add_type (em, ETHERNET_TYPE_##s, #s); +#include "types.def" +#undef ethernet_type + + if ((error = vlib_call_init_function (vm, llc_init))) + return error; + if ((error = vlib_call_init_function (vm, ethernet_input_init))) + return error; + if ((error = vlib_call_init_function (vm, ethernet_cli_init))) + return error; + + return error; +} + +VLIB_INIT_FUNCTION (ethernet_init); + +ethernet_main_t * ethernet_get_main (vlib_main_t * vm) +{ + vlib_call_init_function (vm, ethernet_init); + return ðernet_main; +} + diff --git a/vnet/vnet/ethernet/interface.c b/vnet/vnet/ethernet/interface.c new file mode 100644 index 00000000000..fe7eb76bf46 --- /dev/null +++ b/vnet/vnet/ethernet/interface.c @@ -0,0 +1,462 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_interface.c: ethernet interfaces + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> + +static uword ethernet_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + vnet_sw_interface_t * sub_sw = vnet_get_sw_interface (vnm, sw_if_index); + vnet_sw_interface_t * sup_sw = vnet_get_sup_sw_interface (vnm, sw_if_index); + vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + ethernet_main_t * em = ðernet_main; + ethernet_interface_t * ei; + ethernet_header_t * h = rewrite; + ethernet_type_t type; + uword n_bytes = sizeof (h[0]); + + if (sub_sw != sup_sw) { + if (sub_sw->sub.eth.flags.one_tag) { + n_bytes += sizeof (ethernet_vlan_header_t); + } else if (sub_sw->sub.eth.flags.two_tags) { + n_bytes += 2 * (sizeof (ethernet_vlan_header_t)); + } + // Check for encaps that are not supported for L3 interfaces + if (!(sub_sw->sub.eth.flags.exact_match) || + (sub_sw->sub.eth.flags.default_sub) || + (sub_sw->sub.eth.flags.outer_vlan_id_any) || + (sub_sw->sub.eth.flags.inner_vlan_id_any)) { + return 0; + } + } + + if (n_bytes > max_rewrite_bytes) + return 0; + + switch (l3_type) { +#define _(a,b) case VNET_L3_PACKET_TYPE_##a: type = ETHERNET_TYPE_##b; break + _ (IP4, IP4); + _ (IP6, IP6); + _ (MPLS_UNICAST, MPLS_UNICAST); + _ (MPLS_MULTICAST, MPLS_MULTICAST); + _ (ARP, ARP); +#undef _ + default: + return 0; + } + + ei = pool_elt_at_index (em->interfaces, hw->hw_instance); + memcpy (h->src_address, ei->address, sizeof (h->src_address)); + if (dst_address) + memcpy (h->dst_address, dst_address, sizeof (h->dst_address)); + else + memset (h->dst_address, ~0, sizeof (h->dst_address)); /* broadcast */ + + if (sub_sw->sub.eth.flags.one_tag) { + ethernet_vlan_header_t * outer = (void *) (h + 1); + + h->type = sub_sw->sub.eth.flags.dot1ad ? + clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) : + clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + outer->priority_cfi_and_id = clib_host_to_net_u16 (sub_sw->sub.eth.outer_vlan_id); + outer->type = clib_host_to_net_u16 (type); + + } else if (sub_sw->sub.eth.flags.two_tags) { + ethernet_vlan_header_t * outer = (void *) (h + 1); + ethernet_vlan_header_t * inner = (void *) (outer + 1); + + h->type = sub_sw->sub.eth.flags.dot1ad ? + clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) : + clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + outer->priority_cfi_and_id = clib_host_to_net_u16 (sub_sw->sub.eth.outer_vlan_id); + outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + inner->priority_cfi_and_id = clib_host_to_net_u16 (sub_sw->sub.eth.inner_vlan_id); + inner->type = clib_host_to_net_u16 (type); + + } else { + h->type = clib_host_to_net_u16 (type); + } + + return n_bytes; +} + +VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = { + .name = "Ethernet", + .format_address = format_ethernet_address, + .format_header = format_ethernet_header_with_length, + .unformat_hw_address = unformat_ethernet_address, + .unformat_header = unformat_ethernet_header, + .set_rewrite = ethernet_set_rewrite, +}; + +uword unformat_ethernet_interface (unformat_input_t * input, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + u32 * result = va_arg (*args, u32 *); + u32 hw_if_index; + ethernet_main_t * em = ðernet_main; + ethernet_interface_t * eif; + + if (! unformat_user (input, unformat_vnet_hw_interface, vnm, &hw_if_index)) + return 0; + + eif = ethernet_get_interface (em, hw_if_index); + if (eif) + { + *result = hw_if_index; + return 1; + } + return 0; +} + +clib_error_t * +ethernet_register_interface (vnet_main_t * vnm, + u32 dev_class_index, + u32 dev_instance, + u8 * address, + u32 * hw_if_index_return, + ethernet_flag_change_function_t flag_change) +{ + ethernet_main_t * em = ðernet_main; + ethernet_interface_t * ei; + vnet_hw_interface_t * hi; + clib_error_t * error = 0; + u32 hw_if_index; + + pool_get (em->interfaces, ei); + ei->flag_change = flag_change; + + hw_if_index = vnet_register_interface + (vnm, + dev_class_index, dev_instance, + ethernet_hw_interface_class.index, + ei - em->interfaces); + *hw_if_index_return = hw_if_index; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + + ethernet_setup_node (vnm->vlib_main, hi->output_node_index); + + hi->min_packet_bytes = ETHERNET_MIN_PACKET_BYTES; + hi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES; + hi->per_packet_overhead_bytes = + /* preamble */ 8 + /* inter frame gap */ 12; + + /* Standard default ethernet MTU. */ + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1500; + + memcpy (ei->address, address, sizeof (ei->address)); + vec_free (hi->hw_address); + vec_add (hi->hw_address, address, sizeof (ei->address)); + + if (error) + { + pool_put (em->interfaces, ei); + return error; + } + return error; +} + +void +ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index) +{ + ethernet_main_t * em = ðernet_main; + ethernet_interface_t * ei; + vnet_hw_interface_t * hi; + main_intf_t * main_intf; + vlan_table_t * vlan_table; + u32 idx; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + ei = pool_elt_at_index (em->interfaces, hi->hw_instance); + + /* Delete vlan mapping table for dot1q and dot1ad. */ + main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index); + if (main_intf->dot1q_vlans) { + vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans); + for (idx=0; idx<ETHERNET_N_VLAN; idx++ ) { + if (vlan_table->vlans[idx].qinqs) { + pool_put_index(em->qinq_pool, vlan_table->vlans[idx].qinqs); + } + } + pool_put_index(em->vlan_pool, main_intf->dot1q_vlans); + } + if (main_intf->dot1ad_vlans) { + vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans); + for (idx=0; idx<ETHERNET_N_VLAN; idx++ ) { + if (vlan_table->vlans[idx].qinqs) { + pool_put_index(em->qinq_pool, vlan_table->vlans[idx].qinqs); + } + } + pool_put_index(em->vlan_pool, main_intf->dot1ad_vlans); + } + + vnet_delete_hw_interface (vnm, hw_if_index); + pool_put (em->interfaces, ei); +} + +u32 +ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + ethernet_main_t * em = ðernet_main; + vnet_hw_interface_t * hi; + ethernet_interface_t * ei; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + + ASSERT (hi->hw_class_index == ethernet_hw_interface_class.index); + + ei = pool_elt_at_index (em->interfaces, hi->hw_instance); + if (ei->flag_change) + return ei->flag_change (vnm, hi, flags); + return (u32)~0; +} + +#define VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT VNET_INTERFACE_TX_N_NEXT + +/* Echo packets back to ethernet input. */ +static uword +simulated_ethernet_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, n_left_to_next, n_copy, * from, * to_next; + u32 next_index = VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT; + u32 i; + vlib_buffer_t * b; + + n_left_from = frame->n_vectors; + from = vlib_frame_args (frame); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + n_copy = clib_min (n_left_from, n_left_to_next); + + memcpy (to_next, from, n_copy * sizeof (from[0])); + n_left_to_next -= n_copy; + n_left_from -= n_copy; + for (i = 0; i < n_copy; i++) + { + b = vlib_get_buffer (vm, from[i]); + /* Set up RX and TX indices as if received from a real driver */ + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX]; + vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~0; + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return n_left_from; +} + +static u8 * format_simulated_ethernet_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "loop%d", dev_instance); +} + +VNET_DEVICE_CLASS (ethernet_simulated_device_class) = { + .name = "Loopback", + .format_device_name = format_simulated_ethernet_name, + .tx_function = simulated_ethernet_interface_tx, +}; + +int vnet_create_loopback_interface (u32 * sw_if_indexp, u8 *mac_address) +{ + vnet_main_t * vnm = vnet_get_main(); + vlib_main_t * vm = vlib_get_main(); + clib_error_t * error; + static u32 instance; + u8 address[6]; + u32 hw_if_index; + vnet_hw_interface_t * hw_if; + u32 slot; + int rv = 0; + + ASSERT(sw_if_indexp); + + *sw_if_indexp = (u32)~0; + + memset (address, 0, sizeof (address)); + + /* + * Default MAC address (dead:0000:0000 + instance) is allocated + * if zero mac_address is configured. Otherwise, user-configurable MAC + * address is programmed on the loopback interface. + */ + if (memcmp (address, mac_address, sizeof (address))) + memcpy (address, mac_address, sizeof (address)); + else + { + address[0] = 0xde; + address[1] = 0xad; + address[5] = instance; + } + + error = ethernet_register_interface + (vnm, + ethernet_simulated_device_class.index, + instance++, + address, + &hw_if_index, + /* flag change */ 0); + + if (error) + { + rv = VNET_API_ERROR_INVALID_REGISTRATION; + clib_error_report(error); + return rv; + } + + hw_if = vnet_get_hw_interface (vnm, hw_if_index); + slot = vlib_node_add_named_next_with_slot + (vm, hw_if->tx_node_index, + "ethernet-input", + VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + + { + vnet_sw_interface_t * si = vnet_get_hw_sw_interface (vnm, hw_if_index); + *sw_if_indexp = si->sw_if_index; + } + + return 0; +} + +static clib_error_t * +create_simulated_ethernet_interfaces (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int rv; + u32 sw_if_index; + u8 mac_address[6]; + + memset (mac_address, 0, sizeof (mac_address)); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "mac %U", unformat_ethernet_address, mac_address)) + ; + else + break; + } + + rv = vnet_create_loopback_interface (&sw_if_index, mac_address); + + if (rv) + return clib_error_return (0, "vnet_create_loopback_interface failed"); + + return 0; +} + +VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = { + .path = "loopback create-interface", + .short_help = "Create Loopback ethernet interface [mac <mac-addr>]", + .function = create_simulated_ethernet_interfaces, +}; + +ethernet_interface_t * +ethernet_get_interface (ethernet_main_t * em, u32 hw_if_index) +{ + vnet_hw_interface_t * i = vnet_get_hw_interface (vnet_get_main(), hw_if_index); + return (i->hw_class_index == ethernet_hw_interface_class.index + ? pool_elt_at_index (em->interfaces, i->hw_instance) + : 0); +} + +int vnet_delete_loopback_interface (u32 sw_if_index) +{ + vnet_main_t * vnm = vnet_get_main(); + vnet_sw_interface_t * si; + + if (pool_is_free_index (vnm->interface_main.sw_interfaces, + sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + si = vnet_get_sw_interface (vnm, sw_if_index); + ethernet_delete_interface (vnm, si->hw_if_index); + + return 0; +} + +static clib_error_t * +delete_simulated_ethernet_interfaces (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int rv; + u32 sw_if_index = ~0; + vnet_main_t * vnm = vnet_get_main(); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", + unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "interface not specified"); + + rv = vnet_delete_loopback_interface (sw_if_index); + + if (rv) + return clib_error_return (0, "vnet_delete_loopback_interface failed"); + + return 0; +} + +VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = { + .path = "loopback delete-interface", + .short_help = "Delete Loopback ethernet interface intfc <interface>", + .function = delete_simulated_ethernet_interfaces, +}; diff --git a/vnet/vnet/ethernet/mac_swap.c b/vnet/vnet/ethernet/mac_swap.c new file mode 100644 index 00000000000..6bb7f5b0b02 --- /dev/null +++ b/vnet/vnet/ethernet/mac_swap.c @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/error.h> +#include <vnet/devices/pci/ige.h> +#include <vnet/devices/pci/ixge.h> +#include <vnet/devices/pci/ixgev.h> + +typedef struct { + u32 cached_next_index; + u32 cached_sw_if_index; + + /* Hash table to map sw_if_index to next node index */ + uword * next_node_index_by_sw_if_index; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} mac_swap_main_t; + +typedef struct { + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u32 next_index; +} swap_trace_t; + +/* packet trace format function */ +static u8 * format_swap_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + swap_trace_t * t = va_arg (*args, swap_trace_t *); + + s = format (s, "SWAP: dst now %U src now %U sw_if_index %d next_index %d", + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->sw_if_index, + t->next_index); + return s; +} + +#define foreach_hw_driver_next \ + _(IP4) \ + _(IP6) \ + _(ETHERNET) + +mac_swap_main_t mac_swap_main; + +static vlib_node_registration_t mac_swap_node; + +#define foreach_mac_swap_error \ +_(SWAPS, "mac addresses swapped") + +typedef enum { +#define _(sym,str) MAC_SWAP_ERROR_##sym, + foreach_mac_swap_error +#undef _ + MAC_SWAP_N_ERROR, +} mac_swap_error_t; + +static char * mac_swap_error_strings[] = { +#define _(sym,string) string, + foreach_mac_swap_error +#undef _ +}; + +/* + * To drop a pkt and increment one of the previous counters: + * + * set b0->error = error_node->errors[RANDOM_ERROR_SAMPLE]; + * set next0 to a disposition index bound to "error-drop". + * + * To manually increment the specific counter MAC_SWAP_ERROR_SAMPLE: + * + * vlib_node_t *n = vlib_get_node (vm, mac_swap.index); + * u32 node_counter_base_index = n->error_heap_index; + * vlib_error_main_t * em = &vm->error_main; + * em->counters[node_counter_base_index + MAC_SWAP_ERROR_SAMPLE] += 1; + * + */ + +typedef enum { + MAC_SWAP_NEXT_DROP, + MAC_SWAP_N_NEXT, +} mac_swap_next_t; + +static uword +mac_swap_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + mac_swap_next_t next_index; + mac_swap_main_t * msm = &mac_swap_main; + vlib_node_t *n = vlib_get_node (vm, mac_swap_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + uword * p0, * p1; + u64 tmp0a, tmp0b; + u64 tmp1a, tmp1b; + ethernet_header_t * h0, *h1; + + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + next0 = msm->cached_next_index; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + next1 = msm->cached_next_index; + + if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index0)) + { + p0 = hash_get (msm->next_node_index_by_sw_if_index, sw_if_index0); + if (p0 == 0) + { + vnet_hw_interface_t *hw0; + + hw0 = vnet_get_sup_hw_interface (msm->vnet_main, + sw_if_index0); + + next0 = vlib_node_add_next (msm->vlib_main, + mac_swap_node.index, + hw0->output_node_index); + hash_set (msm->next_node_index_by_sw_if_index, + sw_if_index0, next0); + } + else + next0 = p0[0]; + msm->cached_sw_if_index = sw_if_index0; + msm->cached_next_index = next0; + next1 = next0; + } + if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index1)) + { + p1 = hash_get (msm->next_node_index_by_sw_if_index, sw_if_index1); + if (p1 == 0) + { + vnet_hw_interface_t *hw1; + + hw1 = vnet_get_sup_hw_interface (msm->vnet_main, + sw_if_index1); + + next1 = vlib_node_add_next (msm->vlib_main, + mac_swap_node.index, + hw1->output_node_index); + hash_set (msm->next_node_index_by_sw_if_index, + sw_if_index1, next1); + } + else + next1 = p1[0]; + msm->cached_sw_if_index = sw_if_index1; + msm->cached_next_index = next1; + } + + em->counters[node_counter_base_index + MAC_SWAP_ERROR_SWAPS] += 2; + + /* reset buffer so we always point at the MAC hdr */ + vlib_buffer_reset (b0); + vlib_buffer_reset (b1); + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + /* Swap 2 x src and dst mac addresses using 8-byte load/stores */ + tmp0a = clib_net_to_host_u64(((u64 *)(h0->dst_address))[0]); + tmp1a = clib_net_to_host_u64(((u64 *)(h1->dst_address))[0]); + tmp0b = clib_net_to_host_u64(((u64 *)(h0->src_address))[0]); + tmp1b = clib_net_to_host_u64(((u64 *)(h1->src_address))[0]); + ((u64 *)(h0->dst_address))[0] = clib_host_to_net_u64(tmp0b); + ((u64 *)(h1->dst_address))[0] = clib_host_to_net_u64(tmp1b); + /* Move the ethertype from "b" to "a" */ + tmp0a &= ~(0xFFFF); + tmp1a &= ~(0xFFFF); + tmp0a |= tmp0b & 0xFFFF; + ((u64 *)(h0->src_address))[0] = clib_host_to_net_u64(tmp0a); + tmp1a |= tmp1b & 0xFFFF; + ((u64 *)(h1->src_address))[0] = clib_host_to_net_u64(tmp1a); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + swap_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + memcpy (t->src, h0->src_address, 6); + memcpy (t->dst, h0->dst_address, 6); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + swap_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); + memcpy (t->src, h1->src_address, 6); + memcpy (t->dst, h1->dst_address, 6); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + uword * p0; + u64 tmp0a, tmp0b; + ethernet_header_t * h0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + next0 = msm->cached_next_index; + + if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index0)) + { + p0 = hash_get (msm->next_node_index_by_sw_if_index, sw_if_index0); + if (p0 == 0) + { + vnet_hw_interface_t *hw0; + + hw0 = vnet_get_sup_hw_interface (msm->vnet_main, + sw_if_index0); + + next0 = vlib_node_add_next (msm->vlib_main, + mac_swap_node.index, + hw0->output_node_index); + hash_set (msm->next_node_index_by_sw_if_index, + sw_if_index0, next0); + } + else + next0 = p0[0]; + msm->cached_sw_if_index = sw_if_index0; + msm->cached_next_index = next0; + } + + em->counters[node_counter_base_index + MAC_SWAP_ERROR_SWAPS] += 1; + + /* reset buffer so we always point at the MAC hdr */ + vlib_buffer_reset (b0); + h0 = vlib_buffer_get_current (b0); + + /* Exchange src and dst, preserve the ethertype */ + tmp0a = clib_net_to_host_u64(((u64 *)(h0->dst_address))[0]); + tmp0b = clib_net_to_host_u64(((u64 *)(h0->src_address))[0]); + ((u64 *)(h0->dst_address))[0] = clib_host_to_net_u64(tmp0b); + tmp0a &= ~(0xFFFF); + tmp0a |= tmp0b & 0xFFFF; + ((u64 *)(h0->src_address))[0] = clib_host_to_net_u64(tmp0a); + + /* ship it */ + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + swap_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + memcpy (t->src, h0->src_address, 6); + memcpy (t->dst, h0->dst_address, 6); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (mac_swap_node,static) = { + .function = mac_swap_node_fn, + .name = "mac-swap", + .vector_size = sizeof (u32), + .format_trace = format_swap_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(mac_swap_error_strings), + .error_strings = mac_swap_error_strings, + + .n_next_nodes = MAC_SWAP_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [MAC_SWAP_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *mac_swap_init (vlib_main_t *vm) +{ + mac_swap_main_t * msm = &mac_swap_main; + + msm->next_node_index_by_sw_if_index = hash_create (0, sizeof (uword)); + msm->cached_next_index = (u32)~0; + msm->cached_sw_if_index = (u32)~0; + msm->vlib_main = vm; + msm->vnet_main = vnet_get_main(); + + /* Driver RX nodes send pkts here... */ +#define _(a) ixge_set_next_node (IXGE_RX_NEXT_##a##_INPUT, "mac-swap"); + foreach_hw_driver_next +#undef _ +#define _(a) ixgev_set_next_node (IXGEV_RX_NEXT_##a##_INPUT, "mac-swap"); + foreach_hw_driver_next +#undef _ +#define _(a) ige_set_next_node (IGE_RX_NEXT_##a##_INPUT, "mac-swap"); + foreach_hw_driver_next +#undef _ + + return 0; +} + +VLIB_INIT_FUNCTION (mac_swap_init); + diff --git a/vnet/vnet/ethernet/node.c b/vnet/vnet/ethernet/node.c new file mode 100644 index 00000000000..9c943992b68 --- /dev/null +++ b/vnet/vnet/ethernet/node.c @@ -0,0 +1,1112 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_node.c: ethernet packet processing + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/sparse_vec.h> +#include <vnet/l2/l2_bvi.h> + + +#define foreach_ethernet_input_next \ + _ (PUNT, "error-punt") \ + _ (DROP, "error-drop") \ + _ (LLC, "llc-input") + +typedef enum { +#define _(s,n) ETHERNET_INPUT_NEXT_##s, + foreach_ethernet_input_next +#undef _ + ETHERNET_INPUT_N_NEXT, +} ethernet_input_next_t; + +typedef struct { + u8 packet_data[32]; +} ethernet_input_trace_t; + +static u8 * format_ethernet_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ethernet_input_trace_t * t = va_arg (*va, ethernet_input_trace_t *); + + s = format (s, "%U", format_ethernet_header, t->packet_data); + + return s; +} + +vlib_node_registration_t ethernet_input_node; + +typedef enum { + ETHERNET_INPUT_VARIANT_ETHERNET, + ETHERNET_INPUT_VARIANT_ETHERNET_TYPE, + ETHERNET_INPUT_VARIANT_VLAN, + ETHERNET_INPUT_VARIANT_NOT_L2, +} ethernet_input_variant_t; + + +// Compare two ethernet macs. Return 1 if they are the same, 0 if different +static_always_inline u32 +eth_mac_equal (u8 * mac1, u8 * mac2) { + return (*((u32 *)(mac1+0)) == *((u32 *)(mac2+0)) && + *((u32 *)(mac1+2)) == *((u32 *)(mac2+2))); +} + + +// Parse the ethernet header to extract vlan tags and innermost ethertype +static_always_inline void +parse_header (ethernet_input_variant_t variant, + vlib_buffer_t * b0, + u16 * type, + u16 * orig_type, + u16 * outer_id, + u16 * inner_id, + u32 * match_flags) { + + if (variant == ETHERNET_INPUT_VARIANT_ETHERNET + || variant == ETHERNET_INPUT_VARIANT_NOT_L2) { + ethernet_header_t * e0; + + e0 = (void *) (b0->data + b0->current_data); + + vnet_buffer (b0)->ethernet.start_of_ethernet_header = b0->current_data; + + vlib_buffer_advance (b0, sizeof (e0[0])); + + *type = clib_net_to_host_u16(e0->type); + } else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE) { + // here when prior node was LLC/SNAP processing + u16 * e0; + + e0 = (void *) (b0->data + b0->current_data); + + vlib_buffer_advance (b0, sizeof (e0[0])); + + *type = clib_net_to_host_u16(e0[0]); + } + + // save for distinguishing between dot1q and dot1ad later + *orig_type = *type; + + // default the tags to 0 (used if there is no corresponding tag) + *outer_id = 0; + *inner_id = 0; + + *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG; + + // check for vlan encaps + if ((*type == ETHERNET_TYPE_VLAN) || + (*type == ETHERNET_TYPE_DOT1AD) || + (*type == ETHERNET_TYPE_VLAN_9100) || + (*type == ETHERNET_TYPE_VLAN_9200)) + { + ethernet_vlan_header_t * h0; + u16 tag; + + *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG; + + h0 = (void *) (b0->data + b0->current_data); + + tag = clib_net_to_host_u16 (h0->priority_cfi_and_id); + + *outer_id = tag & 0xfff; + + *type = clib_net_to_host_u16(h0->type); + + vlib_buffer_advance (b0, sizeof (h0[0])); + + if (*type == ETHERNET_TYPE_VLAN) { + // Double tagged packet + *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG; + + h0 = (void *) (b0->data + b0->current_data); + + tag = clib_net_to_host_u16 (h0->priority_cfi_and_id); + + *inner_id = tag & 0xfff; + + *type = clib_net_to_host_u16(h0->type); + + vlib_buffer_advance (b0, sizeof (h0[0])); + + if (*type == ETHERNET_TYPE_VLAN) { + // More than double tagged packet + *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG; + } + } + } +} + +// Determine the subinterface for this packet, given the result of the +// vlan table lookups and vlan header parsing. Check the most specific +// matches first. +static_always_inline void +identify_subint (vnet_hw_interface_t * hi, + vlib_buffer_t * b0, + u32 match_flags, + main_intf_t * main_intf, + vlan_intf_t * vlan_intf, + qinq_intf_t * qinq_intf, + u32 * new_sw_if_index, + u8 * error0, + u32 * is_l2) +{ + u32 matched; + + matched = eth_identify_subint (hi, b0, match_flags, + main_intf, vlan_intf, qinq_intf, + new_sw_if_index, error0, is_l2); + + if (matched) { + + // Perform L3 my-mac filter + // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac. + // This is required for promiscuous mode, else we will forward packets we aren't supposed to. + if (!(*is_l2)) { + ethernet_header_t * e0; + e0 = (void *) (b0->data + vnet_buffer (b0)->ethernet.start_of_ethernet_header); + + if (!(ethernet_address_cast(e0->dst_address))) { + if (!eth_mac_equal((u8 *)e0, hi->hw_address)) { + *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH; + } + } + } + + // Check for down subinterface + *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN; + } +} + +static_always_inline void +determine_next_node (ethernet_main_t * em, + ethernet_input_variant_t variant, + u32 is_l20, + u32 type0, + vlib_buffer_t * b0, + u8 * error0, + u8 * next0) +{ + if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE)) { + // some error occurred + *next0 = ETHERNET_INPUT_NEXT_DROP; + } else if (is_l20) { + *next0 = em->l2_next; + // record the L2 len and reset the buffer so the L2 header is preserved + vnet_buffer(b0)->l2.l2_len = b0->current_data; + vlib_buffer_advance (b0, -(b0->current_data)); + + // check for common IP/MPLS ethertypes + } else if (type0 == ETHERNET_TYPE_IP4) { + *next0 = em->l3_next.input_next_ip4; + } else if (type0 == ETHERNET_TYPE_IP6) { + *next0 = em->l3_next.input_next_ip6; + } else if (type0 == ETHERNET_TYPE_MPLS_UNICAST) { + *next0 = em->l3_next.input_next_mpls; + + } else if (em->redirect_l3) { + // L3 Redirect is on, the cached common next nodes will be + // pointing to the redirect node, catch the uncommon types here + *next0 = em->redirect_l3_next; + } else { + // uncommon ethertype, check table + u32 i0; + i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0); + *next0 = vec_elt (em->l3_next.input_next_by_type, i0); + *error0 = i0 == SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0; + + // The table is not populated with LLC values, so check that now. + // If variant is variant_ethernet then we came from LLC processing. Don't + // go back there; drop instead using by keeping the drop/bad table result. + if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET)) { + *next0 = ETHERNET_INPUT_NEXT_LLC; + } + } +} + +static_always_inline uword +ethernet_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + ethernet_input_variant_t variant) +{ + vnet_main_t * vnm = vnet_get_main(); + ethernet_main_t * em = ðernet_main; + vlib_node_runtime_t * error_node; + u32 n_left_from, next_index, * from, * to_next; + u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; + u32 cpu_index = os_get_cpu_number(); + + if (variant != ETHERNET_INPUT_VARIANT_ETHERNET) + error_node = vlib_node_get_runtime (vm, ethernet_input_node.index); + else + error_node = node; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, + from, + n_left_from, + sizeof (from[0]), + sizeof (ethernet_input_trace_t)); + + next_index = node->cached_next_index; + stats_sw_if_index = node->runtime_data[0]; + stats_n_packets = stats_n_bytes = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u8 next0, next1, error0, error1; + u16 type0, orig_type0, type1, orig_type1; + u16 outer_id0, inner_id0, outer_id1, inner_id1; + u32 match_flags0, match_flags1; + u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1, new_sw_if_index1, len1; + vnet_hw_interface_t * hi0, * hi1; + main_intf_t * main_intf0, * main_intf1; + vlan_intf_t * vlan_intf0, * vlan_intf1; + qinq_intf_t * qinq_intf0, * qinq_intf1; + u32 is_l20, is_l21; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * b2, * b3; + + b2 = vlib_get_buffer (vm, from[2]); + b3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (b2, STORE); + vlib_prefetch_buffer_header (b3, STORE); + + CLIB_PREFETCH (b2->data, sizeof (ethernet_header_t), LOAD); + CLIB_PREFETCH (b3->data, sizeof (ethernet_header_t), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + error0 = error1 = ETHERNET_ERROR_NONE; + + parse_header (variant, + b0, + &type0, + &orig_type0, + &outer_id0, + &inner_id0, + &match_flags0); + + parse_header (variant, + b1, + &type1, + &orig_type1, + &outer_id1, + &inner_id1, + &match_flags1); + + old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + eth_vlan_table_lookups (em, + vnm, + old_sw_if_index0, + orig_type0, + outer_id0, + inner_id0, + &hi0, + &main_intf0, + &vlan_intf0, + &qinq_intf0); + + eth_vlan_table_lookups (em, + vnm, + old_sw_if_index1, + orig_type1, + outer_id1, + inner_id1, + &hi1, + &main_intf1, + &vlan_intf1, + &qinq_intf1); + + identify_subint (hi0, + b0, + match_flags0, + main_intf0, + vlan_intf0, + qinq_intf0, + &new_sw_if_index0, + &error0, + &is_l20); + + identify_subint (hi1, + b1, + match_flags1, + main_intf1, + vlan_intf1, + qinq_intf1, + &new_sw_if_index1, + &error1, + &is_l21); + + // Save RX sw_if_index for later nodes + vnet_buffer (b0)->sw_if_index[VLIB_RX] = error0 != ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0; + vnet_buffer (b1)->sw_if_index[VLIB_RX] = error1 != ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1; + + // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1) + if (((new_sw_if_index0 != ~0) && (new_sw_if_index0 != old_sw_if_index0)) || + ((new_sw_if_index1 != ~0) && (new_sw_if_index1 != old_sw_if_index1))) { + + len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data + - vnet_buffer (b0)->ethernet.start_of_ethernet_header; + len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data + - vnet_buffer (b1)->ethernet.start_of_ethernet_header; + + stats_n_packets += 2; + stats_n_bytes += len0 + len1; + + if (PREDICT_FALSE (! (new_sw_if_index0 == stats_sw_if_index && new_sw_if_index1 == stats_sw_if_index))) + { + stats_n_packets -= 2; + stats_n_bytes -= len0 + len1; + + if (new_sw_if_index0 != old_sw_if_index0 && new_sw_if_index0 != ~0) + vlib_increment_combined_counter + (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + new_sw_if_index0, + 1, + len0); + if (new_sw_if_index1 != old_sw_if_index1 && new_sw_if_index1 != ~0) + vlib_increment_combined_counter + (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + new_sw_if_index1, + 1, + len1); + + if (new_sw_if_index0 == new_sw_if_index1) + { + if (stats_n_packets > 0) + { + vlib_increment_combined_counter + (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + stats_sw_if_index, + stats_n_packets, + stats_n_bytes); + stats_n_packets = stats_n_bytes = 0; + } + stats_sw_if_index = new_sw_if_index0; + } + } + } + + if (variant == ETHERNET_INPUT_VARIANT_NOT_L2) + is_l20 = is_l21 = 0; + + determine_next_node(em, variant, is_l20, type0, b0, &error0, &next0); + determine_next_node(em, variant, is_l21, type1, b1, &error1, &next1); + + b0->error = error_node->errors[error0]; + b1->error = error_node->errors[error1]; + + // verify speculative enqueue + vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u8 error0, next0; + u16 type0, orig_type0; + u16 outer_id0, inner_id0; + u32 match_flags0; + u32 old_sw_if_index0, new_sw_if_index0, len0; + vnet_hw_interface_t * hi0; + main_intf_t * main_intf0; + vlan_intf_t * vlan_intf0; + qinq_intf_t * qinq_intf0; + u32 is_l20; + + // Prefetch next iteration + if (n_left_from > 1) { + vlib_buffer_t * p2; + + p2 = vlib_get_buffer (vm, from[1]); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + error0 = ETHERNET_ERROR_NONE; + + parse_header (variant, + b0, + &type0, + &orig_type0, + &outer_id0, + &inner_id0, + &match_flags0); + + old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + eth_vlan_table_lookups (em, + vnm, + old_sw_if_index0, + orig_type0, + outer_id0, + inner_id0, + &hi0, + &main_intf0, + &vlan_intf0, + &qinq_intf0); + + identify_subint (hi0, + b0, + match_flags0, + main_intf0, + vlan_intf0, + qinq_intf0, + &new_sw_if_index0, + &error0, + &is_l20); + + // Save RX sw_if_index for later nodes + vnet_buffer (b0)->sw_if_index[VLIB_RX] = error0 != ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0; + + // Increment subinterface stats + // Note that interface-level counters have already been incremented + // prior to calling this function. Thus only subinterface counters + // are incremented here. + // + // Interface level counters include packets received on the main + // interface and all subinterfaces. Subinterface level counters + // include only those packets received on that subinterface + // Increment stats if the subint is valid and it is not the main intf + if ((new_sw_if_index0 != ~0) && (new_sw_if_index0 != old_sw_if_index0)) { + + len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data + - vnet_buffer (b0)->ethernet.start_of_ethernet_header; + + stats_n_packets += 1; + stats_n_bytes += len0; + + // Batch stat increments from the same subinterface so counters + // don't need to be incremented for every packet. + if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index)) { + stats_n_packets -= 1; + stats_n_bytes -= len0; + + if (new_sw_if_index0 != ~0) + vlib_increment_combined_counter + (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + new_sw_if_index0, + 1, + len0); + if (stats_n_packets > 0) { + vlib_increment_combined_counter + (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + stats_sw_if_index, + stats_n_packets, + stats_n_bytes); + stats_n_packets = stats_n_bytes = 0; + } + stats_sw_if_index = new_sw_if_index0; + } + } + + if (variant == ETHERNET_INPUT_VARIANT_NOT_L2) + is_l20 = 0; + + determine_next_node(em, variant, is_l20, type0, b0, &error0, &next0); + + b0->error = error_node->errors[error0]; + + // verify speculative enqueue + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + // Increment any remaining batched stats + if (stats_n_packets > 0) + { + vlib_increment_combined_counter + (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + stats_sw_if_index, + stats_n_packets, + stats_n_bytes); + node->runtime_data[0] = stats_sw_if_index; + } + + return from_frame->n_vectors; +} + +static uword +ethernet_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ return ethernet_input_inline (vm, node, from_frame, ETHERNET_INPUT_VARIANT_ETHERNET); } + +static uword +ethernet_input_type (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ return ethernet_input_inline (vm, node, from_frame, ETHERNET_INPUT_VARIANT_ETHERNET_TYPE); } + +static uword +ethernet_input_not_l2 (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ return ethernet_input_inline (vm, node, from_frame, ETHERNET_INPUT_VARIANT_NOT_L2); } + + +// Return the subinterface config struct for the given sw_if_index +// Also return via parameter the appropriate match flags for the +// configured number of tags. +// On error (unsupported or not ethernet) return 0. +static subint_config_t * +ethernet_sw_interface_get_config (vnet_main_t * vnm, + u32 sw_if_index, + u32 * flags, + u32 * unsupported) { + ethernet_main_t * em = ðernet_main; + vnet_hw_interface_t * hi; + vnet_sw_interface_t * si; + main_intf_t * main_intf; + vlan_table_t * vlan_table; + qinq_table_t * qinq_table; + subint_config_t * subint = 0; + + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + + if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index)) { + *unsupported = 0; + goto done; // non-ethernet interface + } + + // ensure there's an entry for the main intf (shouldn't really be necessary) + vec_validate (em->main_intfs, hi->hw_if_index); + main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index); + + // Locate the subint for the given ethernet config + si = vnet_get_sw_interface (vnm, sw_if_index); + + if (si->sub.eth.flags.default_sub) { + subint = &main_intf->default_subint; + *flags = SUBINT_CONFIG_MATCH_0_TAG | + SUBINT_CONFIG_MATCH_1_TAG | + SUBINT_CONFIG_MATCH_2_TAG | + SUBINT_CONFIG_MATCH_3_TAG; + } else if ((si->sub.eth.flags.no_tags) || + (si->sub.eth.raw_flags == 0)) { + // if no flags are set then this is a main interface + // so treat as untagged + subint = &main_intf->untagged_subint; + *flags = SUBINT_CONFIG_MATCH_0_TAG; + } else { + // one or two tags + // first get the vlan table + if (si->sub.eth.flags.dot1ad) { + if (main_intf->dot1ad_vlans == 0) { + // Allocate a vlan table from the pool + pool_get(em->vlan_pool, vlan_table); + main_intf->dot1ad_vlans = vlan_table - em->vlan_pool; + } else { + // Get ptr to existing vlan table + vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans); + } + } else { // dot1q + if (main_intf->dot1q_vlans == 0) { + // Allocate a vlan table from the pool + pool_get(em->vlan_pool, vlan_table); + main_intf->dot1q_vlans = vlan_table - em->vlan_pool; + } else { + // Get ptr to existing vlan table + vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans); + } + } + + if (si->sub.eth.flags.one_tag) { + *flags = si->sub.eth.flags.exact_match ? + SUBINT_CONFIG_MATCH_1_TAG : + (SUBINT_CONFIG_MATCH_1_TAG | + SUBINT_CONFIG_MATCH_2_TAG | + SUBINT_CONFIG_MATCH_3_TAG); + + if (si->sub.eth.flags.outer_vlan_id_any) { + // not implemented yet + *unsupported =1; + goto done; + } else { + // a single vlan, a common case + subint = &vlan_table->vlans[si->sub.eth.outer_vlan_id].single_tag_subint; + } + + } else { + // Two tags + *flags = si->sub.eth.flags.exact_match ? + SUBINT_CONFIG_MATCH_2_TAG : + (SUBINT_CONFIG_MATCH_2_TAG | + SUBINT_CONFIG_MATCH_3_TAG); + + if (si->sub.eth.flags.outer_vlan_id_any && si->sub.eth.flags.inner_vlan_id_any) { + // not implemented yet + *unsupported = 1; + goto done; + } + + if (si->sub.eth.flags.inner_vlan_id_any) { + // a specific outer and "any" inner + // don't need a qinq table for this + subint = &vlan_table->vlans[si->sub.eth.outer_vlan_id].inner_any_subint; + if (si->sub.eth.flags.exact_match) { + *flags = SUBINT_CONFIG_MATCH_2_TAG; + } else { + *flags = SUBINT_CONFIG_MATCH_2_TAG | + SUBINT_CONFIG_MATCH_3_TAG; + } + } else { + // a specific outer + specifc innner vlan id, a common case + + // get the qinq table + if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0) { + // Allocate a qinq table from the pool + pool_get(em->qinq_pool, qinq_table); + vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs = qinq_table - em->qinq_pool; + } else { + // Get ptr to existing qinq table + qinq_table = vec_elt_at_index (em->qinq_pool, vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs); + } + subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint; + } + } + } + + done: + return subint; +} + +clib_error_t * +ethernet_sw_interface_up_down (vnet_main_t * vnm, + u32 sw_if_index, + u32 flags) +{ + subint_config_t * subint; + u32 dummy_flags; + u32 dummy_unsup; + clib_error_t * error = 0; + + // Find the config for this subinterface + subint = ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags, &dummy_unsup); + + if (subint == 0) { + // not implemented yet or not ethernet + goto done; + } + + subint->sw_if_index = + ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0); + + done: + return error; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down); + + +// Set the L2/L3 mode for the subinterface +void +ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, + u32 sw_if_index, + u32 l2) +{ + subint_config_t *subint; + u32 dummy_flags; + u32 dummy_unsup; + int is_port; + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, sw_if_index); + + is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB); + + // Find the config for this subinterface + subint = ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags, &dummy_unsup); + + if (subint == 0) { + // unimplemented or not ethernet + goto done; + } + + // Double check that the config we found is for our interface (or the interface is down) + ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0)); + + if (l2) { + subint->flags |= SUBINT_CONFIG_L2; + if (is_port) + subint->flags |= + SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG + | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG; + } else { + subint->flags &= ~SUBINT_CONFIG_L2; + if (is_port) + subint->flags &= + ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG + | SUBINT_CONFIG_MATCH_3_TAG); + } + + done: + return; +} + + +static clib_error_t * +ethernet_sw_interface_add_del (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_create) +{ + clib_error_t * error = 0; + subint_config_t *subint; + u32 match_flags; + u32 unsupported; + + // Find the config for this subinterface + subint = ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags, &unsupported); + + if (subint == 0) { + // not implemented yet or not ethernet + if (unsupported) { + // this is the NYI case + error = clib_error_return (0, "not implemented yet"); + } + goto done; + } + + if (!is_create) { + subint->flags = 0; + return error; + } + + // Initialize the subint + if (subint->flags & SUBINT_CONFIG_VALID) { + // Error vlan already in use + error = clib_error_return (0, "vlan is already in use"); + } else { + // Note that config is L3 by defaulty + subint->flags = SUBINT_CONFIG_VALID | match_flags; + subint->sw_if_index = ~0; // because interfaces are initially down + } + + done: + return error; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del); + +static char * ethernet_error_strings[] = { +#define ethernet_error(n,c,s) s, +#include "error.def" +#undef ethernet_error +}; + +VLIB_REGISTER_NODE (ethernet_input_node) = { + .function = ethernet_input, + .name = "ethernet-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = ETHERNET_N_ERROR, + .error_strings = ethernet_error_strings, + + .n_next_nodes = ETHERNET_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n, + foreach_ethernet_input_next +#undef _ + }, + + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_ethernet_input_trace, + .unformat_buffer = unformat_ethernet_header, +}; + +VLIB_REGISTER_NODE (ethernet_input_type_node,static) = { + .function = ethernet_input_type, + .name = "ethernet-input-type", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_next_nodes = ETHERNET_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n, + foreach_ethernet_input_next +#undef _ + }, +}; + +VLIB_REGISTER_NODE (ethernet_input_not_l2_node,static) = { + .function = ethernet_input_not_l2, + .name = "ethernet-input-not-l2", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_next_nodes = ETHERNET_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n, + foreach_ethernet_input_next +#undef _ + }, +}; + +void ethernet_set_rx_redirect (vnet_main_t * vnm, + vnet_hw_interface_t * hi, + u32 enable) +{ + // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets + // don't go directly to ip4-input) + vnet_hw_interface_rx_redirect_to_node + (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0); +} + + +/* + * Initialization and registration for the next_by_ethernet structure + */ + +clib_error_t * next_by_ethertype_init (next_by_ethertype_t * l3_next) +{ + l3_next->input_next_by_type = sparse_vec_new + (/* elt bytes */ sizeof (l3_next->input_next_by_type[0]), + /* bits in index */ BITS (((ethernet_header_t *) 0)->type)); + + vec_validate (l3_next->sparse_index_by_input_next_index, ETHERNET_INPUT_NEXT_DROP); + vec_validate (l3_next->sparse_index_by_input_next_index, ETHERNET_INPUT_NEXT_PUNT); + l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] + = SPARSE_VEC_INVALID_INDEX; + l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] + = SPARSE_VEC_INVALID_INDEX; + + return 0; +} + +// Add an ethertype -> next index mapping to the structure +clib_error_t * next_by_ethertype_register (next_by_ethertype_t * l3_next, + u32 ethertype, + u32 next_index) +{ + u32 i; + u16 * n; + ethernet_main_t * em = ðernet_main; + + /* Setup ethernet type -> next index sparse vector mapping. */ + n = sparse_vec_validate (l3_next->input_next_by_type, ethertype); + n[0] = next_index; + + /* Rebuild next index -> sparse index inverse mapping when sparse vector + is updated. */ + vec_validate (l3_next->sparse_index_by_input_next_index, next_index); + for (i = 1; i < vec_len (l3_next->input_next_by_type); i++) + l3_next->sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i; + + // do not allow the cached next index's to be updated if L3 + // redirect is enabled, as it will have overwritten them + if (!em->redirect_l3) { + // Cache common ethertypes directly + if (ethertype == ETHERNET_TYPE_IP4) { + l3_next->input_next_ip4 = next_index; + } else if (ethertype == ETHERNET_TYPE_IP6) { + l3_next->input_next_ip6 = next_index; + } else if (ethertype == ETHERNET_TYPE_MPLS_UNICAST) { + l3_next->input_next_mpls = next_index; + } + } + return 0; +} + + +static clib_error_t * ethernet_input_init (vlib_main_t * vm) +{ + ethernet_main_t * em = ðernet_main; + __attribute__((unused)) vlan_table_t * invalid_vlan_table; + __attribute__((unused)) qinq_table_t * invalid_qinq_table; + + ethernet_setup_node (vm, ethernet_input_node.index); + ethernet_setup_node (vm, ethernet_input_type_node.index); + ethernet_setup_node (vm, ethernet_input_not_l2_node.index); + + next_by_ethertype_init (&em->l3_next); + + // Initialize pools and vector for vlan parsing + vec_validate (em->main_intfs, 10); // 10 main interfaces + pool_alloc(em->vlan_pool, 10); + pool_alloc(em->qinq_pool, 1); + + // The first vlan pool will always be reserved for an invalid table + pool_get(em->vlan_pool, invalid_vlan_table); // first id = 0 + // The first qinq pool will always be reserved for an invalid table + pool_get(em->qinq_pool, invalid_qinq_table); // first id = 0 + + return 0; +} + +VLIB_INIT_FUNCTION (ethernet_input_init); + +void +ethernet_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index) +{ + ethernet_main_t * em = ðernet_main; + ethernet_type_info_t * ti; + u32 i; + + { + clib_error_t * error = vlib_call_init_function (vm, ethernet_init); + if (error) + clib_error_report (error); + } + + ti = ethernet_get_type_info (em, type); + ti->node_index = node_index; + ti->next_index = vlib_node_add_next (vm, + ethernet_input_node.index, + node_index); + i = vlib_node_add_next (vm, + ethernet_input_type_node.index, + node_index); + ASSERT (i == ti->next_index); + + i = vlib_node_add_next (vm, + ethernet_input_not_l2_node.index, + node_index); + ASSERT (i == ti->next_index); + + // Add the L3 node for this ethertype to the next nodes structure + next_by_ethertype_register (&em->l3_next, type, ti->next_index); + + // Call the registration functions for other nodes that want a mapping + l2bvi_register_input_type (vm, type, node_index); +} + +void +ethernet_register_l2_input (vlib_main_t * vm, + u32 node_index) +{ + ethernet_main_t * em = ðernet_main; + u32 i; + + em->l2_next = vlib_node_add_next (vm, ethernet_input_node.index, node_index); + + /* + * Even if we never use these arcs, we have to align the next indices... + */ + i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index); + + ASSERT (i == em->l2_next); + + i = vlib_node_add_next (vm, + ethernet_input_not_l2_node.index, + node_index); + ASSERT (i == em->l2_next); +} + +// Register a next node for L3 redirect, and enable L3 redirect +void +ethernet_register_l3_redirect (vlib_main_t * vm, + u32 node_index) +{ + ethernet_main_t * em = ðernet_main; + u32 i; + + em->redirect_l3 = 1; + em->redirect_l3_next = vlib_node_add_next(vm, + ethernet_input_node.index, + node_index); + /* + * Change the cached next nodes to the redirect node + */ + em->l3_next.input_next_ip4 = em->redirect_l3_next; + em->l3_next.input_next_ip6 = em->redirect_l3_next; + em->l3_next.input_next_mpls = em->redirect_l3_next; + + /* + * Even if we never use these arcs, we have to align the next indices... + */ + i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index); + + ASSERT (i == em->redirect_l3_next); +} diff --git a/vnet/vnet/ethernet/packet.h b/vnet/vnet/ethernet/packet.h new file mode 100644 index 00000000000..b5d1dcba931 --- /dev/null +++ b/vnet/vnet/ethernet/packet.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet/packet.h: ethernet packet format. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ethernet_packet_h +#define included_ethernet_packet_h + +typedef enum { +#define ethernet_type(n,s) ETHERNET_TYPE_##s = n, +#include <vnet/ethernet/types.def> +#undef ethernet_type +} ethernet_type_t; + +typedef struct { + /* Source/destination address. */ + u8 dst_address[6]; + u8 src_address[6]; + + /* Ethernet type. */ + u16 type; +} ethernet_header_t; + +#define ETHERNET_ADDRESS_UNICAST 0 +#define ETHERNET_ADDRESS_MULTICAST 1 + +/* I/G bit: individual (unicast)/group (broadcast/multicast). */ +always_inline uword +ethernet_address_cast (u8 * a) +{ return (a[0] >> 0) & 1; } + +always_inline uword +ethernet_address_is_locally_administered (u8 * a) +{ return (a[0] >> 1) & 1; } + +always_inline void +ethernet_address_set_locally_administered (u8 * a) +{ a[0] |= 1 << 1; } + +/* For VLAN ethernet type. */ +typedef struct { + /* 3 bit priority, 1 bit CFI and 12 bit vlan id. */ + u16 priority_cfi_and_id; + +#define ETHERNET_N_VLAN (1 << 12) + + /* Inner ethernet type. */ + u16 type; +} ethernet_vlan_header_t; + + +/* VLAN with ethertype first and vlan id second */ +typedef struct { + /* vlan type */ + u16 type; + + /* 3 bit priority, 1 bit CFI and 12 bit vlan id. */ + u16 priority_cfi_and_id; +} ethernet_vlan_header_tv_t; + +#endif /* included_ethernet_packet_h */ diff --git a/vnet/vnet/ethernet/pg.c b/vnet/vnet/ethernet/pg.c new file mode 100644 index 00000000000..838688db451 --- /dev/null +++ b/vnet/vnet/ethernet/pg.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_pg.c: packet generator ethernet interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> + +typedef struct { + pg_edit_t type; + pg_edit_t src_address; + pg_edit_t dst_address; +} pg_ethernet_header_t; + +static inline void +pg_ethernet_header_init (pg_ethernet_header_t * e) +{ + pg_edit_init (&e->type, ethernet_header_t, type); + pg_edit_init (&e->src_address, ethernet_header_t, src_address); + pg_edit_init (&e->dst_address, ethernet_header_t, dst_address); +} + +typedef struct { + pg_edit_t type; + pg_edit_t id; + pg_edit_t cfi; + pg_edit_t priority; +} pg_ethernet_vlan_header_t; + +static inline void +pg_ethernet_vlan_header_init (pg_ethernet_vlan_header_t * v, + int vlan_index) +{ + ASSERT (vlan_index < ARRAY_LEN (((ethernet_max_header_t *) 0)->vlan)); + pg_edit_init (&v->type, ethernet_max_header_t, vlan[vlan_index].type); + + pg_edit_init_bitfield (&v->id, ethernet_max_header_t, + vlan[vlan_index].priority_cfi_and_id, + 0, 12); + pg_edit_init_bitfield (&v->cfi, ethernet_max_header_t, + vlan[vlan_index].priority_cfi_and_id, + 12, 1); + pg_edit_init_bitfield (&v->priority, ethernet_max_header_t, + vlan[vlan_index].priority_cfi_and_id, + 13, 3); +} + +uword +unformat_pg_ethernet_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_ethernet_header_t * e; + pg_ethernet_vlan_header_t * v; + pg_edit_t * ether_type_edit; + u32 n_vlan, error, group_index; + + e = pg_create_edit_group (s, sizeof (e[0]), sizeof (ethernet_header_t), + &group_index); + pg_ethernet_header_init (e); + error = 1; + + if (! unformat (input, "%U: %U -> %U", + unformat_pg_edit, + unformat_ethernet_type_net_byte_order, &e->type, + unformat_pg_edit, + unformat_ethernet_address, &e->src_address, + unformat_pg_edit, + unformat_ethernet_address, &e->dst_address)) + goto done; + + n_vlan = 0; + while (unformat (input, "vlan")) + { + v = pg_add_edits (s, sizeof (v[0]), sizeof (ethernet_vlan_header_t), + group_index); + pg_ethernet_vlan_header_init (v, n_vlan); + + if (! unformat_user (input, unformat_pg_edit, + unformat_pg_number, &v->id)) + goto done; + + if (! unformat (input, "priority %U", unformat_pg_edit, + unformat_pg_number, &v->priority)) + pg_edit_set_fixed (&v->priority, 0); + + if (! unformat (input, "cfi %U", unformat_pg_edit, + unformat_pg_number, &v->cfi)) + pg_edit_set_fixed (&v->cfi, 0); + + /* Too many vlans given. */ + if (n_vlan >= 2) + goto done; + + n_vlan++; + } + + /* Address of e may have changed due to vlan edits being added */ + e = pg_get_edit_group (s, group_index); + v = (void *) (e + 1); + + /* Correct types for vlan packets. */ + ether_type_edit = &e->type; + if (n_vlan > 0) + { + int i; + + ether_type_edit = &v[n_vlan - 1].type; + pg_edit_copy_type_and_values (ether_type_edit, &e->type); + pg_edit_set_fixed (&e->type, ETHERNET_TYPE_VLAN); + + for (i = 0; i < n_vlan - 1; i++) + pg_edit_set_fixed (&v[i].type, ETHERNET_TYPE_VLAN); + } + + { + ethernet_main_t * em = ðernet_main; + ethernet_type_info_t * ti = 0; + pg_node_t * pg_node = 0; + + if (ether_type_edit->type == PG_EDIT_FIXED) + { + u16 t = *(u16 *) ether_type_edit->values[PG_EDIT_LO]; + ti = ethernet_get_type_info (em, clib_net_to_host_u16 (t)); + if (ti && ti->node_index != ~0) + pg_node = pg_get_node (ti->node_index); + } + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + else if (! unformat_user (input, unformat_pg_payload, s)) + goto done; + } + + error = 0; + + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/ethernet/types.def b/vnet/vnet/ethernet/types.def new file mode 100644 index 00000000000..37c97f62cdb --- /dev/null +++ b/vnet/vnet/ethernet/types.def @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Emacs editing mode -*-C-*- Ethernet types. */ + +/* + * ethernet types + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Types < 0x600 (1536) are LLC packet lengths. */ +ethernet_type (0x600, LLC_LENGTH) + +ethernet_type (0x600, XNS_IDP) +ethernet_type (0x800, IP4) +ethernet_type (0x806, ARP) +ethernet_type (0x0BAD, VINES_IP) +ethernet_type (0x0BAE, VINES_LOOPBACK) +ethernet_type (0x0BAF, VINES_ECHO) +ethernet_type (0x1984, TRAIN) +ethernet_type (0x2000, CDP) +ethernet_type (0x2001, CGMP) +ethernet_type (0x2007, SRP_CONTROL) +ethernet_type (0x2452, CENTRINO_PROMISC) +ethernet_type (0x6000, DECNET) +ethernet_type (0x6001, DECNET_DUMP_LOAD) +ethernet_type (0x6002, DECNET_REMOTE_CONSOLE) +ethernet_type (0x6003, DECNET_ROUTE) +ethernet_type (0x6004, DEC_LAT) +ethernet_type (0x6005, DEC_DIAGNOSTIC) +ethernet_type (0x6006, DEC_CUSTOMER) +ethernet_type (0x6007, DEC_SCA) +ethernet_type (0x6558, TRANSPARENT_BRIDGING) +ethernet_type (0x6559, RAW_FRAME_RELAY) +ethernet_type (0x8035, REVERSE_ARP) +ethernet_type (0x8038, DEC_LAN_BRIDGE) +ethernet_type (0x803D, DEC_ETHERNET_ENCRYPTION) +ethernet_type (0x803F, DEC_LAN_TRAFFIC_MONITOR) +ethernet_type (0x8041, DEC_LAST) +ethernet_type (0x809B, APPLETALK) +ethernet_type (0x80D5, IBM_SNA) +ethernet_type (0x80F3, APPLETALK_AARP) +ethernet_type (0x80FF, WELLFLEET_COMPRESSION) +ethernet_type (0x8100, VLAN) +ethernet_type (0x8137, IPX) +ethernet_type (0x814C, SNMP) +ethernet_type (0x81FD, CABLETRON_ISMP) +ethernet_type (0x81FF, CABLETRON_ISMP_TBFLOOD) +ethernet_type (0x86DD, IP6) +ethernet_type (0x86DF, ATOMIC) +ethernet_type (0x876B, TCP_IP_COMPRESSION) +ethernet_type (0x876C, IP_AUTONOMOUS_SYSTEMS) +ethernet_type (0x876D, SECURE_DATA) +ethernet_type (0x8808, MAC_CONTROL) +ethernet_type (0x8809, SLOW_PROTOCOLS) +ethernet_type (0x880B, PPP) +ethernet_type (0x8847, MPLS_UNICAST) +ethernet_type (0x8848, MPLS_MULTICAST) +ethernet_type (0x8863, PPPOE_DISCOVERY) +ethernet_type (0x8864, PPPOE_SESSION) +ethernet_type (0x886D, INTEL_ANS) +ethernet_type (0x886F, MICROSOFT_NLB_HEARTBEAT) +ethernet_type (0x8881, CDMA_2000) +ethernet_type (0x888e, 802_1X_AUTHENTICATION) +ethernet_type (0x8892, PROFINET) +ethernet_type (0x889a, HYPERSCSI) +ethernet_type (0x88a2, AOE) +ethernet_type (0x88a8, DOT1AD) +ethernet_type (0x88AE, BRDWALK) +ethernet_type (0x88B7, 802_OUI_EXTENDED) +ethernet_type (0x88c7, 802_11I_PRE_AUTHENTICATION) +ethernet_type (0x88cc, 802_1_LLDP) +ethernet_type (0x894f, VPATH_3) +ethernet_type (0x9000, LOOPBACK) +ethernet_type (0x9021, RTNET_MAC) +ethernet_type (0x9022, RTNET_CONFIG) +ethernet_type (0x9100, VLAN_9100) +ethernet_type (0x9200, VLAN_9200) +ethernet_type (0x9999, PGLAN) +ethernet_type (0xFEFE, SRP_ISIS) +ethernet_type (0xFFFF, RESERVED) diff --git a/vnet/vnet/flow/flow_report.c b/vnet/vnet/flow/flow_report.c new file mode 100644 index 00000000000..9c0cbb6dd7d --- /dev/null +++ b/vnet/vnet/flow/flow_report.c @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * flow_report.c + */ +#include <vnet/flow/flow_report.h> +#include <vnet/api_errno.h> + +int send_template_packet (flow_report_main_t *frm, + flow_report_t *fr, + u32 * buffer_indexp) +{ + u32 bi0; + vlib_buffer_t * b0; + ip4_ipfix_template_packet_t * tp; + ipfix_message_header_t * h; + ip4_header_t * ip; + udp_header_t * udp; + vlib_main_t * vm = frm->vlib_main; + + ASSERT (buffer_indexp); + + if (fr->update_rewrite || fr->rewrite == 0) + { + if (frm->ipfix_collector.as_u32 == 0 + || frm->src_address.as_u32 == 0) + { + clib_warning ("no collector: disabling flow collector process"); + vlib_node_set_state (frm->vlib_main, flow_report_process_node.index, + VLIB_NODE_STATE_DISABLED); + return -1; + } + vec_free (fr->rewrite); + fr->update_rewrite = 1; + } + + if (fr->update_rewrite) + { + fr->rewrite = fr->rewrite_callback (frm, fr, + &frm->ipfix_collector, + &frm->src_address); + fr->update_rewrite = 0; + } + + if (vlib_buffer_alloc (vm, &bi0, 1) != 1) + return -1; + + b0 = vlib_get_buffer (vm, bi0); + + ASSERT (vec_len (fr->rewrite) < VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES); + + memcpy (b0->data, fr->rewrite, vec_len (fr->rewrite)); + b0->current_data = 0; + b0->current_length = vec_len (fr->rewrite); + b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + /* $$$ for now, look up in fib-0. Later: arbitrary TX fib */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0; + + tp = vlib_buffer_get_current (b0); + ip = (ip4_header_t *) &tp->ip4; + udp = (udp_header_t *) (ip+1); + h = (ipfix_message_header_t *)(udp+1); + + /* FIXUP: message header export_time */ + h->export_time = (u32) + (((f64)frm->unix_time_0) + + (vlib_time_now(frm->vlib_main) - frm->vlib_time_0)); + h->export_time = clib_host_to_net_u32(h->export_time); + + /* FIXUP: message header sequence_number. Templates do not increase it */ + h->sequence_number = clib_host_to_net_u32(fr->sequence_number); + + /* FIXUP: udp length */ + udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip)); + + *buffer_indexp = bi0; + return 0; +} + +static uword +flow_report_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + flow_report_main_t * frm = &flow_report_main; + flow_report_t * fr; + u32 ip4_lookup_node_index; + vlib_node_t * ip4_lookup_node; + vlib_frame_t * nf = 0; + u32 template_bi; + u32 * to_next; + int send_template; + f64 now; + int rv; + uword event_type; + uword *event_data = 0; + + /* Wait for Godot... */ + vlib_process_wait_for_event_or_clock (vm, 1e9); + event_type = vlib_process_get_events (vm, &event_data); + if (event_type != 1) + clib_warning ("bogus kickoff event received, %d", event_type); + vec_reset_length (event_data); + + /* Enqueue pkts to ip4-lookup */ + ip4_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup"); + ip4_lookup_node_index = ip4_lookup_node->index; + + while (1) + { + vlib_process_suspend (vm, 5.0); + + vec_foreach (fr, frm->reports) + { + now = vlib_time_now (vm); + + /* Need to send a template packet? */ + send_template = now > (fr->last_template_sent + 20.0); + send_template += fr->last_template_sent == 0; + template_bi = ~0; + rv = 0; + + if (send_template) + rv = send_template_packet (frm, fr, &template_bi); + + if (rv < 0) + continue; + + nf = vlib_get_frame_to_node (vm, ip4_lookup_node_index); + nf->n_vectors = 0; + to_next = vlib_frame_vector_args (nf); + + if (template_bi != ~0) + { + to_next[0] = template_bi; + to_next++; + nf->n_vectors++; + } + + nf = fr->flow_data_callback (frm, fr, + nf, to_next, ip4_lookup_node_index); + if (nf) + vlib_put_frame_to_node (vm, ip4_lookup_node_index, nf); + } + } + + return 0; /* not so much */ +} + +VLIB_REGISTER_NODE (flow_report_process_node) = { + .function = flow_report_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "flow-report-process", +}; + +int vnet_flow_report_add_del (flow_report_main_t *frm, + vnet_flow_report_add_del_args_t *a) +{ + int i; + int found_index = ~0; + flow_report_t *fr; + + for (i = 0; i < vec_len(frm->reports); i++) + { + fr = vec_elt_at_index (frm->reports, i); + if (fr->opaque == a->opaque + && fr->rewrite_callback == a->rewrite_callback + && fr->flow_data_callback == a->flow_data_callback) + { + found_index = i; + break; + } + } + + if (a->is_add == 0) + { + if (found_index != ~0) + { + vec_delete (frm->reports, 1, found_index); + return 0; + } + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + vec_add2 (frm->reports, fr, 1); + + fr->sequence_number = 0; + fr->domain_id = a->domain_id; + fr->update_rewrite = 1; + fr->opaque = a->opaque; + fr->rewrite_callback = a->rewrite_callback; + fr->flow_data_callback = a->flow_data_callback; + + return 0; +} + +static clib_error_t * +set_ipfix_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + flow_report_main_t * frm = &flow_report_main; + ip4_address_t collector, src; + + collector.as_u32 = 0; + src.as_u32 = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "collector %U", unformat_ip4_address, &collector)) + ; + else if (unformat (input, "src %U", unformat_ip4_address, &src)) + ; + else + break; + } + + if (collector.as_u32 == 0) + return clib_error_return (0, "collector address required"); + + if (src.as_u32 == 0) + return clib_error_return (0, "src address required"); + + frm->ipfix_collector.as_u32 = collector.as_u32; + frm->src_address.as_u32 = src.as_u32; + + vlib_cli_output (vm, "Collector %U, src address %U", + format_ip4_address, &frm->ipfix_collector, + format_ip4_address, &frm->src_address); + + /* Turn on the flow reporting process */ + vlib_process_signal_event (vm, flow_report_process_node.index, + 1, 0); + return 0; +} + +VLIB_CLI_COMMAND (set_ipfix_command, static) = { + .path = "set ipfix", + .short_help = "set ipfix collector <ip4-address> src <ip4-address>", + .function = set_ipfix_command_fn, +}; + +static clib_error_t * +flow_report_init (vlib_main_t *vm) +{ + flow_report_main_t * frm = &flow_report_main; + + frm->vlib_main = vm; + frm->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (flow_report_init) diff --git a/vnet/vnet/flow/flow_report.h b/vnet/vnet/flow/flow_report.h new file mode 100644 index 00000000000..14185bf6798 --- /dev/null +++ b/vnet/vnet/flow/flow_report.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_vnet_flow_report_h__ +#define __included_vnet_flow_report_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/ip/udp.h> +#include <vlib/cli.h> +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +#include <vnet/flow/ipfix_packet.h> + +/* Used to build the rewrite */ +typedef struct { + ip4_header_t ip4; + udp_header_t udp; + ipfix_template_packet_t ipfix; +} ip4_ipfix_template_packet_t; + +struct flow_report_main; +struct flow_report; + +typedef u8 * (vnet_flow_rewrite_callback_t)(struct flow_report_main *, + struct flow_report *, + ip4_address_t *, + ip4_address_t *); + +typedef vlib_frame_t * (vnet_flow_data_callback_t) (struct flow_report_main *, + struct flow_report *, + vlib_frame_t *, u32 *, + u32); +typedef struct flow_report { + /* ipfix rewrite, set by callback */ + u8 * rewrite; + u32 sequence_number; + u32 domain_id; + f64 last_template_sent; + int update_rewrite; + + /* Bitmap of fields to send */ + uword * fields_to_send; + + /* Opaque data */ + void * opaque; + + /* build-the-rewrite callback */ + vnet_flow_rewrite_callback_t *rewrite_callback; + + /* Send-flow-data callback */ + vnet_flow_data_callback_t *flow_data_callback; +} flow_report_t; + +typedef struct flow_report_main { + flow_report_t * reports; + + /* ipfix collector, our ip address */ + ip4_address_t ipfix_collector; + ip4_address_t src_address; + + /* time scale transform. Joy. */ + u32 unix_time_0; + f64 vlib_time_0; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} flow_report_main_t; + +flow_report_main_t flow_report_main; + +vlib_node_registration_t flow_report_process_node; + +int vnet_flow_report_enable_disable (u32 sw_if_index, u32 table_index, + int enable_disable); +typedef struct { + vnet_flow_data_callback_t *flow_data_callback; + vnet_flow_rewrite_callback_t *rewrite_callback; + void * opaque; + int is_add; + u32 domain_id; +} vnet_flow_report_add_del_args_t; + +int vnet_flow_report_add_del (flow_report_main_t *frm, + vnet_flow_report_add_del_args_t *a); + +#endif /* __included_vnet_flow_report_h__ */ diff --git a/vnet/vnet/flow/flow_report_sample.c b/vnet/vnet/flow/flow_report_sample.c new file mode 100644 index 00000000000..eb2fbfced1d --- /dev/null +++ b/vnet/vnet/flow/flow_report_sample.c @@ -0,0 +1,397 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/flow/flow_report.h> +#include <vnet/flow/flow_report_sample.h> +#include <vnet/api_errno.h> + +typedef struct { + u32 classify_table_index; +} flow_report_sample_main_t; + +flow_report_sample_main_t flow_report_sample_main; + +static u8 * template_rewrite (flow_report_main_t * frm, + flow_report_t * fr, + ip4_address_t * collector_address, + ip4_address_t * src_address) +{ + vnet_classify_table_t * tblp; + vnet_classify_main_t * vcm = &vnet_classify_main; + flow_report_sample_main_t *fsm = + (flow_report_sample_main_t *) fr->opaque; + ip4_header_t * ip; + udp_header_t * udp; + ipfix_message_header_t * h; + ipfix_set_header_t * s; + ipfix_template_header_t * t; + ipfix_field_specifier_t * f; + ipfix_field_specifier_t * first_field; + u8 * rewrite = 0; + ip4_ipfix_template_packet_t * tp; + i32 l3_offset = -2; /* sizeof (ethernet_header_t) - sizeof (u32x4) */ + u32 field_count = 0; + u32 field_index = 0; + + tblp = pool_elt_at_index (vcm->tables, fsm->classify_table_index); + + /* + * Mumble, assumes that we're not classifying on L2 or first 2 octets + * of L3.. + */ + + ip = (ip4_header_t *)(((u8 *)(tblp->mask)) + l3_offset); + udp = (udp_header_t *)(ip+1); + + /* Determine field count */ +#define _(field,mask,item,length) \ + if ((field) == (mask)) \ + { \ + field_count++; \ + \ + fr->fields_to_send = clib_bitmap_set (fr->fields_to_send, \ + field_index, 1); \ + } \ + field_index++; + + foreach_ipfix_field; +#undef _ + /* Add packetTotalCount manually */ + field_count += 1; + + /* $$$ enterprise fields, at some later date */ + + /* allocate rewrite space */ + vec_validate_aligned (rewrite, + sizeof (ip4_ipfix_template_packet_t) + + field_count * sizeof (ipfix_field_specifier_t) - 1, + CLIB_CACHE_LINE_BYTES); + + tp = (ip4_ipfix_template_packet_t *) rewrite; + ip = (ip4_header_t *) &tp->ip4; + udp = (udp_header_t *) (ip+1); + h = (ipfix_message_header_t *)(udp+1); + s = (ipfix_set_header_t *)(h+1); + t = (ipfix_template_header_t *)(s+1); + first_field = f = (ipfix_field_specifier_t *)(t+1); + + ip->ip_version_and_header_length = 0x45; + ip->ttl = 254; + ip->protocol = IP_PROTOCOL_UDP; + ip->src_address.as_u32 = src_address->as_u32; + ip->dst_address.as_u32 = collector_address->as_u32; + udp->src_port = clib_host_to_net_u16 (4739 /* $$FIXME */); + udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_ipfix); + udp->length = clib_host_to_net_u16 (vec_len(rewrite) - sizeof (*ip)); + + /* FIXUP: message header export_time */ + /* FIXUP: message header sequence_number */ + h->domain_id = clib_host_to_net_u32 (fr->domain_id); + + /* Take another trip through the mask and build the template */ + ip = (ip4_header_t *)(((u8 *)(tblp->mask)) + l3_offset); + udp = (udp_header_t *)(ip+1); +#define _(field,mask,item,length) \ + if ((field) == (mask)) \ + { \ + f->e_id_length = ipfix_e_id_length (0 /* enterprise */, \ + item, length); \ + f++; \ + } + foreach_ipfix_field; +#undef _ + + /* Add packetTotalCount manually */ + f->e_id_length = ipfix_e_id_length (0 /* enterprise */, packetTotalCount, 8); + f++; + + /* Back to the template packet... */ + ip = (ip4_header_t *) &tp->ip4; + udp = (udp_header_t *) (ip+1); + + ASSERT (f - first_field); + /* Field count in this template */ + t->id_count = ipfix_id_count (256 /* template_id */, f - first_field); + + /* set length in octets*/ + s->set_id_length = ipfix_set_id_length (2 /* set_id */, (u8 *) f - (u8 *)s); + + /* message length in octets */ + h->version_length = version_length ((u8 *)f - (u8 *)h); + + ip->length = clib_host_to_net_u16 ((u8 *)f - (u8 *)ip); + ip->checksum = ip4_header_checksum (ip); + + return rewrite; +} + +static vlib_frame_t * send_flows (flow_report_main_t * frm, + flow_report_t * fr, + vlib_frame_t * f, u32 * to_next, + u32 node_index) +{ + vnet_classify_main_t * vcm = &vnet_classify_main; + flow_report_sample_main_t * fsm = + (flow_report_sample_main_t *) fr->opaque; + vnet_classify_table_t * t = + pool_elt_at_index (vcm->tables, fsm->classify_table_index); + vnet_classify_bucket_t * b; + vnet_classify_entry_t * v, * save_v; + vlib_buffer_t *b0 = 0; + u32 next_offset = 0; + u32 bi0 = ~0; + int i, j, k; + ip4_ipfix_template_packet_t * tp; + ipfix_message_header_t * h; + ipfix_set_header_t * s = 0; + ip4_header_t * ip; + udp_header_t * udp; + int field_index; + ip4_header_t * match; + u32 records_this_buffer; + u16 new_l0, old_l0; + ip_csum_t sum0; + vlib_main_t * vm = frm->vlib_main; + + while (__sync_lock_test_and_set (t->writer_lock, 1)) + ; + + for (i = 0; i < t->nbuckets; i++) + { + b = &t->buckets [i]; + if (b->offset == 0) + continue; + + save_v = vnet_classify_get_entry (t, b->offset); + for (j = 0; j < (1<<b->log2_pages); j++) + { + for (k = 0; k < t->entries_per_page; k++) + { + v = vnet_classify_entry_at_index + (t, save_v, j*t->entries_per_page + k); + + if (vnet_classify_entry_is_free (v)) + continue; + + /* OK, we have something to send... */ + if (PREDICT_FALSE (b0 == 0)) + { + if (vlib_buffer_alloc (vm, &bi0, 1) != 1) + goto flush; + b0 = vlib_get_buffer (vm, bi0); + + memcpy (b0->data, fr->rewrite, vec_len (fr->rewrite)); + b0->current_data = 0; + b0->current_length = vec_len (fr->rewrite); + b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + /* $$$ for now, look up in fib-0. Later: arbitrary TX fib */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0; + + tp = vlib_buffer_get_current (b0); + ip = (ip4_header_t *) &tp->ip4; + udp = (udp_header_t *) (ip+1); + h = (ipfix_message_header_t *)(udp+1); + s = (ipfix_set_header_t *)(h+1); + + /* FIXUP: message header export_time */ + h->export_time = (u32) + (((f64)frm->unix_time_0) + + (vlib_time_now(frm->vlib_main) - frm->vlib_time_0)); + h->export_time = clib_host_to_net_u32(h->export_time); + + /* FIXUP: message header sequence_number */ + h->sequence_number = fr->sequence_number++; + h->sequence_number = clib_host_to_net_u32 (h->sequence_number); + next_offset = (u32) (((u8 *)(s+1)) - (u8 *)tp); + records_this_buffer = 0; + } + + field_index = 0; + match = (ip4_header_t *) (((u8 *)v->key) - 2); + ip = match; + udp = (udp_header_t * )(ip+1); + +#define _(field,mask,item,length) \ + if (clib_bitmap_get (fr->fields_to_send, field_index)) \ + { \ + memcpy (b0->data + next_offset, &field, \ + length); \ + next_offset += length; \ + } \ + field_index++; + foreach_ipfix_field; +#undef _ + + /* Add packetTotalCount manually */ + { + u64 packets = clib_host_to_net_u64 (v->hits); + memcpy (b0->data + next_offset, &packets, sizeof (packets)); + next_offset += sizeof (packets); + } + records_this_buffer++; + + if (next_offset > 1450) + { + s->set_id_length = ipfix_set_id_length (256 /* template ID*/, + next_offset - + (sizeof (*ip) + sizeof (*udp) + + sizeof (*h))); + b0->current_length = next_offset; + b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + tp = vlib_buffer_get_current (b0); + ip = (ip4_header_t *) &tp->ip4; + udp = (udp_header_t *) (ip+1); + + sum0 = ip->checksum; + old_l0 = clib_net_to_host_u16 (ip->length); + new_l0 = + clib_host_to_net_u16 ((u16)next_offset); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + + ip->checksum = ip_csum_fold (sum0); + ip->length = new_l0; + udp->length = + clib_host_to_net_u16 (b0->current_length - sizeof (ip)); + + to_next[0] = bi0; + f->n_vectors++; + to_next++; + + if (f->n_vectors == VLIB_FRAME_SIZE) + { + vlib_put_frame_to_node (vm, node_index, f); + f = vlib_get_frame_to_node (vm, node_index); + f->n_vectors = 0; + to_next = vlib_frame_vector_args (f); + } + b0 = 0; + bi0 = ~0; + } + } + } + } + + flush: + if (b0) + { + s->set_id_length = ipfix_set_id_length (256 /* template ID*/, + next_offset - + (sizeof (*ip) + sizeof (*udp) + + sizeof (*h))); + b0->current_length = next_offset; + b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + tp = vlib_buffer_get_current (b0); + ip = (ip4_header_t *) &tp->ip4; + udp = (udp_header_t *) (ip+1); + + sum0 = ip->checksum; + old_l0 = ip->length; + new_l0 = clib_host_to_net_u16 ((u16)next_offset); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + + ip->checksum = ip_csum_fold (sum0); + ip->length = new_l0; + udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip)); + + ASSERT (ip->checksum == ip4_header_checksum (ip)); + + to_next[0] = bi0; + f->n_vectors++; + + b0 = 0; + bi0 = ~0; + } + + *(t->writer_lock) = 0; + return f; +} + + +static clib_error_t * +flow_sample_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + flow_report_sample_main_t *fsm = &flow_report_sample_main; + flow_report_main_t *frm = &flow_report_main; + vnet_flow_report_add_del_args_t args; + int rv; + int is_add = 1; + u32 domain_id = 0; + + domain_id = 0; + fsm->classify_table_index = ~0; + memset (&args, 0, sizeof (args)); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "table %d", &fsm->classify_table_index)) + ; + else if (unformat (input, "domain %d", &domain_id)) + ; + else if (unformat (input, "del")) + is_add = 0; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (fsm->classify_table_index == ~0) + return clib_error_return (0, "classifier table not specified"); + + args.opaque = (void *) fsm; + args.rewrite_callback = template_rewrite; + args.flow_data_callback = send_flows; + args.is_add = is_add; + args.domain_id = domain_id; + + rv = vnet_flow_report_add_del (frm, &args); + + switch (rv) + { + case 0: + break; + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "registration not found..."); + default: + return clib_error_return (0, "vnet_flow_report_add_del returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (flow_sample_command, static) = { + .path = "flow sample", + .short_help = "flow sample", + .function = flow_sample_command_fn, +}; + +static clib_error_t * +flow_report_sample_init (vlib_main_t *vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, flow_report_init))) + return error; + + return 0; +} + +VLIB_INIT_FUNCTION (flow_report_sample_init); diff --git a/vnet/vnet/flow/flow_report_sample.h b/vnet/vnet/flow/flow_report_sample.h new file mode 100644 index 00000000000..945beba1897 --- /dev/null +++ b/vnet/vnet/flow/flow_report_sample.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_flow_report_sample_h__ +#define __included_flow_report_sample_h__ + +/* Note: add +2 to udp (src,dst) port enum values to get TCP values */ +#define foreach_ipfix_field \ +_(ip->src_address.as_u32, 0xffffffff, sourceIPv4Address, 4) \ +_(ip->dst_address.as_u32, 0xffffffff, destinationIPv4Address, 4) \ +_(ip->protocol, 0xFF, protocolIdentifier, 1) \ +_(udp->src_port, 0xFFFF, udpSourcePort, 2) \ +_(udp->dst_port, 0xFFFF, udpDestinationPort, 2) + +#endif /* __included_flow_report_sample_h__ */ diff --git a/vnet/vnet/flow/ipfix_info_elements.h b/vnet/vnet/flow/ipfix_info_elements.h new file mode 100644 index 00000000000..5d7e935dabb --- /dev/null +++ b/vnet/vnet/flow/ipfix_info_elements.h @@ -0,0 +1,429 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ipfix_info_elements_h__ +#define __included_ipfix_info_elements_h__ + +#define foreach_ipfix_info_element_t \ +_(octetDeltaCount, 1, u64) \ +_(packetDeltaCount, 2, u64) \ +_(deltaFlowCount, 3, u64) \ +_(protocolIdentifier, 4, u8) \ +_(ipClassOfService, 5, u8) \ +_(tcpControlBits, 6, u16) \ +_(sourceTransportPort, 7, u16) \ +_(sourceIPv4Address, 8, ip4_address_t) \ +_(sourceIPv4PrefixLength, 9, u8) \ +_(ingressInterface, 10, u32) \ +_(destinationTransportPort, 11, u16) \ +_(destinationIPv4Address, 12, ip4_address_t) \ +_(destinationIPv4PrefixLength, 13, u8) \ +_(egressInterface, 14, u32) \ +_(ipNextHopIPv4Address, 15, ip4_address_t) \ +_(bgpSourceAsNumber, 16, u32) \ +_(bgpDestinationAsNumber, 17, u32) \ +_(bgpNextHopIPv4Address, 18, ip4_address_t) \ +_(postMCastPacketDeltaCount, 19, u64) \ +_(postMCastOctetDeltaCount, 20, u64) \ +_(flowEndSysUpTime, 21, u32) \ +_(flowStartSysUpTime, 22, u32) \ +_(postOctetDeltaCount, 23, u64) \ +_(postPacketDeltaCount, 24, u64) \ +_(minimumIpTotalLength, 25, u64) \ +_(maximumIpTotalLength, 26, u64) \ +_(sourceIPv6Address, 27, ip6_address_t) \ +_(destinationIPv6Address, 28, ip6_address_t) \ +_(sourceIPv6PrefixLength, 29, u8) \ +_(destinationIPv6PrefixLength, 30, u8) \ +_(flowLabelIPv6, 31, u32) \ +_(icmpTypeCodeIPv4, 32, u16) \ +_(igmpType, 33, u8) \ +_(samplingInterval, 34, u32) \ +_(samplingAlgorithm, 35, u8) \ +_(flowActiveTimeout, 36, u16) \ +_(flowIdleTimeout, 37, u16) \ +_(engineType, 38, u8) \ +_(engineId, 39, u8) \ +_(exportedOctetTotalCount, 40, u64) \ +_(exportedMessageTotalCount, 41, u64) \ +_(exportedFlowRecordTotalCount, 42, u64) \ +_(ipv4RouterSc, 43, ip4_address_t) \ +_(sourceIPv4Prefix, 44, ip4_address_t) \ +_(destinationIPv4Prefix, 45, ip4_address_t) \ +_(mplsTopLabelType, 46, u8) \ +_(mplsTopLabelIPv4Address, 47, ip4_address_t) \ +_(samplerId, 48, u8) \ +_(samplerMode, 49, u8) \ +_(samplerRandomInterval, 50, u32) \ +_(classId, 51, u8) \ +_(minimumTTL, 52, u8) \ +_(maximumTTL, 53, u8) \ +_(fragmentIdentification, 54, u32) \ +_(postIpClassOfService, 55, u8) \ +_(sourceMacAddress, 56, macAddress) \ +_(postDestinationMacAddress, 57, macAddress) \ +_(vlanId, 58, u16) \ +_(postVlanId, 59, u16) \ +_(ipVersion, 60, u8) \ +_(flowDirection, 61, u8) \ +_(ipNextHopIPv6Address, 62, ip6_address_t) \ +_(bgpNextHopIPv6Address, 63, ip6_address_t) \ +_(ipv6ExtensionHeaders, 64, u32) \ +_(mplsTopLabelStackSection, 70, octetArray) \ +_(mplsLabelStackSection2, 71, octetArray) \ +_(mplsLabelStackSection3, 72, octetArray) \ +_(mplsLabelStackSection4, 73, octetArray) \ +_(mplsLabelStackSection5, 74, octetArray) \ +_(mplsLabelStackSection6, 75, octetArray) \ +_(mplsLabelStackSection7, 76, octetArray) \ +_(mplsLabelStackSection8, 77, octetArray) \ +_(mplsLabelStackSection9, 78, octetArray) \ +_(mplsLabelStackSection10, 79, octetArray) \ +_(destinationMacAddress, 80, macAddress) \ +_(postSourceMacAddress, 81, macAddress) \ +_(interfaceName, 82, string) \ +_(interfaceDescription, 83, string) \ +_(samplerName, 84, string) \ +_(octetTotalCount, 85, u64) \ +_(packetTotalCount, 86, u64) \ +_(flagsAndSamplerId, 87, u32) \ +_(fragmentOffset, 88, u16) \ +_(forwardingStatus, 89, u32) \ +_(mplsVpnRouteDistinguisher, 90, octetArray) \ +_(mplsTopLabelPrefixLength, 91, u8) \ +_(srcTrafficIndex, 92, u32) \ +_(dstTrafficIndex, 93, u32) \ +_(applicationDescription, 94, string) \ +_(applicationId, 95, octetArray) \ +_(applicationName, 96, string) \ +_(Assigned, 97, for NetFlow v9 compatibility ) \ +_(postIpDiffServCodePoint, 98, u8) \ +_(multicastReplicationFactor, 99, u32) \ +_(className, 100, string) \ +_(classificationEngineId, 101, u8) \ +_(layer2packetSectionOffset, 102, u16) \ +_(layer2packetSectionSize, 103, u16) \ +_(layer2packetSectionData, 104, octetArray) \ +_(bgpNextAdjacentAsNumber, 128, u32) \ +_(bgpPrevAdjacentAsNumber, 129, u32) \ +_(exporterIPv4Address, 130, ip4_address_t) \ +_(exporterIPv6Address, 131, ip6_address_t) \ +_(droppedOctetDeltaCount, 132, u64) \ +_(droppedPacketDeltaCount, 133, u64) \ +_(droppedOctetTotalCount, 134, u64) \ +_(droppedPacketTotalCount, 135, u64) \ +_(flowEndReason, 136, u8) \ +_(commonPropertiesId, 137, u64) \ +_(observationPointId, 138, u64) \ +_(icmpTypeCodeIPv6, 139, u16) \ +_(mplsTopLabelIPv6Address, 140, ip6_address_t) \ +_(lineCardId, 141, u32) \ +_(portId, 142, u32) \ +_(meteringProcessId, 143, u32) \ +_(exportingProcessId, 144, u32) \ +_(templateId, 145, u16) \ +_(wlanChannelId, 146, u8) \ +_(wlanSSID, 147, string) \ +_(flowId, 148, u64) \ +_(observationDomainId, 149, u32) \ +_(flowStartSeconds, 150, dateTimeSeconds) \ +_(flowEndSeconds, 151, dateTimeSeconds) \ +_(flowStartMilliseconds, 152, dateTimeMilliseconds) \ +_(flowEndMilliseconds, 153, dateTimeMilliseconds) \ +_(flowStartMicroseconds, 154, dateTimeMicroseconds) \ +_(flowEndMicroseconds, 155, dateTimeMicroseconds) \ +_(flowStartNanoseconds, 156, dateTimeNanoseconds) \ +_(flowEndNanoseconds, 157, dateTimeNanoseconds) \ +_(flowStartDeltaMicroseconds, 158, u32) \ +_(flowEndDeltaMicroseconds, 159, u32) \ +_(systemInitTimeMilliseconds, 160, dateTimeMilliseconds) \ +_(flowDurationMilliseconds, 161, u32) \ +_(flowDurationMicroseconds, 162, u32) \ +_(observedFlowTotalCount, 163, u64) \ +_(ignoredPacketTotalCount, 164, u64) \ +_(ignoredOctetTotalCount, 165, u64) \ +_(notSentFlowTotalCount, 166, u64) \ +_(notSentPacketTotalCount, 167, u64) \ +_(notSentOctetTotalCount, 168, u64) \ +_(destinationIPv6Prefix, 169, ip6_address_t) \ +_(sourceIPv6Prefix, 170, ip6_address_t) \ +_(postOctetTotalCount, 171, u64) \ +_(postPacketTotalCount, 172, u64) \ +_(flowKeyIndicator, 173, u64) \ +_(postMCastPacketTotalCount, 174, u64) \ +_(postMCastOctetTotalCount, 175, u64) \ +_(icmpTypeIPv4, 176, u8) \ +_(icmpCodeIPv4, 177, u8) \ +_(icmpTypeIPv6, 178, u8) \ +_(icmpCodeIPv6, 179, u8) \ +_(udpSourcePort, 180, u16) \ +_(udpDestinationPort, 181, u16) \ +_(tcpSourcePort, 182, u16) \ +_(tcpDestinationPort, 183, u16) \ +_(tcpSequenceNumber, 184, u32) \ +_(tcpAcknowledgementNumber, 185, u32) \ +_(tcpWindowSize, 186, u16) \ +_(tcpUrgentPointer, 187, u16) \ +_(tcpHeaderLength, 188, u8) \ +_(ipHeaderLength, 189, u8) \ +_(totalLengthIPv4, 190, u16) \ +_(payloadLengthIPv6, 191, u16) \ +_(ipTTL, 192, u8) \ +_(nextHeaderIPv6, 193, u8) \ +_(mplsPayloadLength, 194, u32) \ +_(ipDiffServCodePoint, 195, u8) \ +_(ipPrecedence, 196, u8) \ +_(fragmentFlags, 197, u8) \ +_(octetDeltaSumOfSquares, 198, u64) \ +_(octetTotalSumOfSquares, 199, u64) \ +_(mplsTopLabelTTL, 200, u8) \ +_(mplsLabelStackLength, 201, u32) \ +_(mplsLabelStackDepth, 202, u32) \ +_(mplsTopLabelExp, 203, u8) \ +_(ipPayloadLength, 204, u32) \ +_(udpMessageLength, 205, u16) \ +_(isMulticast, 206, u8) \ +_(ipv4IHL, 207, u8) \ +_(ipv4Options, 208, u32) \ +_(tcpOptions, 209, u64) \ +_(paddingOctets, 210, octetArray) \ +_(collectorIPv4Address, 211, ip4_address_t) \ +_(collectorIPv6Address, 212, ip6_address_t) \ +_(exportInterface, 213, u32) \ +_(exportProtocolVersion, 214, u8) \ +_(exportTransportProtocol, 215, u8) \ +_(collectorTransportPort, 216, u16) \ +_(exporterTransportPort, 217, u16) \ +_(tcpSynTotalCount, 218, u64) \ +_(tcpFinTotalCount, 219, u64) \ +_(tcpRstTotalCount, 220, u64) \ +_(tcpPshTotalCount, 221, u64) \ +_(tcpAckTotalCount, 222, u64) \ +_(tcpUrgTotalCount, 223, u64) \ +_(ipTotalLength, 224, u64) \ +_(postNATSourceIPv4Address, 225, ip4_address_t) \ +_(postNATDestinationIPv4Address, 226, ip4_address_t) \ +_(postNAPTSourceTransportPort, 227, u16) \ +_(postNAPTDestinationTransportPort, 228, u16) \ +_(natOriginatingAddressRealm, 229, u8) \ +_(natEvent, 230, u8) \ +_(initiatorOctets, 231, u64) \ +_(responderOctets, 232, u64) \ +_(firewallEvent, 233, u8) \ +_(ingressVRFID, 234, u32) \ +_(egressVRFID, 235, u32) \ +_(VRFname, 236, string) \ +_(postMplsTopLabelExp, 237, u8) \ +_(tcpWindowScale, 238, u16) \ +_(biflowDirection, 239, u8) \ +_(ethernetHeaderLength, 240, u8) \ +_(ethernetPayloadLength, 241, u16) \ +_(ethernetTotalLength, 242, u16) \ +_(dot1qVlanId, 243, u16) \ +_(dot1qPriority, 244, u8) \ +_(dot1qCustomerVlanId, 245, u16) \ +_(dot1qCustomerPriority, 246, u8) \ +_(metroEvcId, 247, string) \ +_(metroEvcType, 248, u8) \ +_(pseudoWireId, 249, u32) \ +_(pseudoWireType, 250, u16) \ +_(pseudoWireControlWord, 251, u32) \ +_(ingressPhysicalInterface, 252, u32) \ +_(egressPhysicalInterface, 253, u32) \ +_(postDot1qVlanId, 254, u16) \ +_(postDot1qCustomerVlanId, 255, u16) \ +_(ethernetType, 256, u16) \ +_(postIpPrecedence, 257, u8) \ +_(collectionTimeMilliseconds, 258, dateTimeMilliseconds) \ +_(exportSctpStreamId, 259, u16) \ +_(maxExportSeconds, 260, dateTimeSeconds) \ +_(maxFlowEndSeconds, 261, dateTimeSeconds) \ +_(messageMD5Checksum, 262, octetArray) \ +_(messageScope, 263, u8) \ +_(minExportSeconds, 264, dateTimeSeconds) \ +_(minFlowStartSeconds, 265, dateTimeSeconds) \ +_(opaqueOctets, 266, octetArray) \ +_(sessionScope, 267, u8) \ +_(maxFlowEndMicroseconds, 268, dateTimeMicroseconds) \ +_(maxFlowEndMilliseconds, 269, dateTimeMilliseconds) \ +_(maxFlowEndNanoseconds, 270, dateTimeNanoseconds) \ +_(minFlowStartMicroseconds, 271, dateTimeMicroseconds) \ +_(minFlowStartMilliseconds, 272, dateTimeMilliseconds) \ +_(minFlowStartNanoseconds, 273, dateTimeNanoseconds) \ +_(collectorCertificate, 274, octetArray) \ +_(exporterCertificate, 275, octetArray) \ +_(dataRecordsReliability, 276, boolean) \ +_(observationPointType, 277, u8) \ +_(newConnectionDeltaCount, 278, u32) \ +_(connectionSumDurationSeconds, 279, u64) \ +_(connectionTransactionId, 280, u64) \ +_(postNATSourceIPv6Address, 281, ip6_address_t) \ +_(postNATDestinationIPv6Address, 282, ip6_address_t) \ +_(natPoolId, 283, u32) \ +_(natPoolName, 284, string) \ +_(anonymizationFlags, 285, u16) \ +_(anonymizationTechnique, 286, u16) \ +_(informationElementIndex, 287, u16) \ +_(p2pTechnology, 288, string) \ +_(tunnelTechnology, 289, string) \ +_(encryptedTechnology, 290, string) \ +_(basicList, 291, basicList) \ +_(subTemplateList, 292, subTemplateList) \ +_(subTemplateMultiList, 293, subTemplateMultiList) \ +_(bgpValidityState, 294, u8) \ +_(IPSecSPI, 295, u32) \ +_(greKey, 296, u32) \ +_(natType, 297, u8) \ +_(initiatorPackets, 298, u64) \ +_(responderPackets, 299, u64) \ +_(observationDomainName, 300, string) \ +_(selectionSequenceId, 301, u64) \ +_(selectorId, 302, u64) \ +_(informationElementId, 303, u16) \ +_(selectorAlgorithm, 304, u16) \ +_(samplingPacketInterval, 305, u32) \ +_(samplingPacketSpace, 306, u32) \ +_(samplingTimeInterval, 307, u32) \ +_(samplingTimeSpace, 308, u32) \ +_(samplingSize, 309, u32) \ +_(samplingPopulation, 310, u32) \ +_(samplingProbability, 311, float64) \ +_(dataLinkFrameSize, 312, u16) \ +_(ipHeaderPacketSection, 313, octetArray) \ +_(ipPayloadPacketSection, 314, octetArray) \ +_(dataLinkFrameSection, 315, octetArray) \ +_(mplsLabelStackSection, 316, octetArray) \ +_(mplsPayloadPacketSection, 317, octetArray) \ +_(selectorIdTotalPktsObserved, 318, u64) \ +_(selectorIdTotalPktsSelected, 319, u64) \ +_(absoluteError, 320, float64) \ +_(relativeError, 321, float64) \ +_(observationTimeSeconds, 322, dateTimeSeconds) \ +_(observationTimeMilliseconds, 323, dateTimeMilliseconds) \ +_(observationTimeMicroseconds, 324, dateTimeMicroseconds) \ +_(observationTimeNanoseconds, 325, dateTimeNanoseconds) \ +_(digestHashValue, 326, u64) \ +_(hashIPPayloadOffset, 327, u64) \ +_(hashIPPayloadSize, 328, u64) \ +_(hashOutputRangeMin, 329, u64) \ +_(hashOutputRangeMax, 330, u64) \ +_(hashSelectedRangeMin, 331, u64) \ +_(hashSelectedRangeMax, 332, u64) \ +_(hashDigestOutput, 333, boolean) \ +_(hashInitialiserValue, 334, u64) \ +_(selectorName, 335, string) \ +_(upperCILimit, 336, float64) \ +_(lowerCILimit, 337, float64) \ +_(confidenceLevel, 338, float64) \ +_(informationElementDataType, 339, u8) \ +_(informationElementDescription, 340, string) \ +_(informationElementName, 341, string) \ +_(informationElementRangeBegin, 342, u64) \ +_(informationElementRangeEnd, 343, u64) \ +_(informationElementSemantics, 344, u8) \ +_(informationElementUnits, 345, u16) \ +_(privateEnterpriseNumber, 346, u32) \ +_(virtualStationInterfaceId, 347, octetArray) \ +_(virtualStationInterfaceName, 348, string) \ +_(virtualStationUUID, 349, octetArray) \ +_(virtualStationName, 350, string) \ +_(layer2SegmentId, 351, u64) \ +_(layer2OctetDeltaCount, 352, u64) \ +_(layer2OctetTotalCount, 353, u64) \ +_(ingressUnicastPacketTotalCount, 354, u64) \ +_(ingressMulticastPacketTotalCount, 355, u64) \ +_(ingressBroadcastPacketTotalCount, 356, u64) \ +_(egressUnicastPacketTotalCount, 357, u64) \ +_(egressBroadcastPacketTotalCount, 358, u64) \ +_(monitoringIntervalStartMilliSeconds, 359, dateTimeMilliseconds) \ +_(monitoringIntervalEndMilliSeconds, 360, dateTimeMilliseconds) \ +_(portRangeStart, 361, u16) \ +_(portRangeEnd, 362, u16) \ +_(portRangeStepSize, 363, u16) \ +_(portRangeNumPorts, 364, u16) \ +_(staMacAddress, 365, macAddress) \ +_(staIPv4Address, 366, ip4_address_t) \ +_(wtpMacAddress, 367, macAddress ) \ +_(ingressInterfaceType, 368, u32) \ +_(egressInterfaceType, 369, u32) \ +_(rtpSequenceNumber, 370, u16) \ +_(userName, 371, string) \ +_(applicationCategoryName, 372, string) \ +_(applicationSubCategoryName, 373, string) \ +_(applicationGroupName, 374, string) \ +_(originalFlowsPresent, 375, u64) \ +_(originalFlowsInitiated, 376, u64) \ +_(originalFlowsCompleted, 377, u64) \ +_(distinctCountOfSourceIPAddress, 378, u64) \ +_(distinctCountOfDestinationIPAddress, 379, u64) \ +_(distinctCountOfSourceIPv4Address, 380, u32) \ +_(distinctCountOfDestinationIPv4Address, 381, u32) \ +_(distinctCountOfSourceIPv6Address, 382, u64) \ +_(distinctCountOfDestinationIPv6Address, 383, u64) \ +_(valueDistributionMethod, 384, u8) \ +_(rfc3550JitterMilliseconds, 385, u32) \ +_(rfc3550JitterMicroseconds, 386, u32) \ +_(rfc3550JitterNanoseconds, 387, u32) \ +_(dot1qDEI, 388, boolean) \ +_(dot1qCustomerDEI, 389, boolean) \ +_(flowSelectorAlgorithm, 390, u16) \ +_(flowSelectedOctetDeltaCount, 391, u64) \ +_(flowSelectedPacketDeltaCount, 392, u64) \ +_(flowSelectedFlowDeltaCount, 393, u64) \ +_(selectorIDTotalFlowsObserved, 394, u64) \ +_(selectorIDTotalFlowsSelected, 395, u64) \ +_(samplingFlowInterval, 396, u64) \ +_(samplingFlowSpacing, 397, u64) \ +_(flowSamplingTimeInterval, 398, u64) \ +_(flowSamplingTimeSpacing, 399, u64) \ +_(hashFlowDomain, 400, u16) \ +_(transportOctetDeltaCount, 401, u64) \ +_(transportPacketDeltaCount, 402, u64) \ +_(originalExporterIPv4Address, 403, ip4_address_t) \ +_(originalExporterIPv6Address, 404, ip6_address_t) \ +_(originalObservationDomainId, 405, u32) \ +_(intermediateProcessId, 406, u32) \ +_(ignoredDataRecordTotalCount, 407, u64) \ +_(dataLinkFrameType, 408, u16) \ +_(sectionOffset, 409, u16) \ +_(sectionExportedOctets, 410, u16) \ +_(dot1qServiceInstanceTag, 411, octetArray) \ +_(dot1qServiceInstanceId, 412, u32) \ +_(dot1qServiceInstancePriority, 413, u8) \ +_(dot1qCustomerSourceMacAddress, 414, macAddress) \ +_(dot1qCustomerDestinationMacAddress, 415, macAddress) \ +_(postLayer2OctetDeltaCount, 417, u64) \ +_(postMCastLayer2OctetDeltaCount, 418, u64) \ +_(postLayer2OctetTotalCount, 420, u64) \ +_(postMCastLayer2OctetTotalCount, 421, u64) \ +_(minimumLayer2TotalLength, 422, u64) \ +_(maximumLayer2TotalLength, 423, u64) \ +_(droppedLayer2OctetDeltaCount, 424, u64) \ +_(droppedLayer2OctetTotalCount, 425, u64) \ +_(ignoredLayer2OctetTotalCount, 426, u64) \ +_(notSentLayer2OctetTotalCount, 427, u64) \ +_(layer2OctetDeltaSumOfSquares, 428, u64) \ +_(layer2OctetTotalSumOfSquares, 429, u64) \ +_(layer2FrameDeltaCount, 430, u64) \ +_(layer2FrameTotalCount, 431, u64) \ +_(pseudoWireDestinationIPv4Address, 432, ip4_address_t) \ +_(ignoredLayer2FrameTotalCount, 433, u64) + +typedef enum { +#define _(n,v,t) n = v, + foreach_ipfix_info_element_t +#undef _ +} ipfix_info_element_id_t; + +#endif /* __included_ipfix_info_elements_h__ */ diff --git a/vnet/vnet/flow/ipfix_packet.h b/vnet/vnet/flow/ipfix_packet.h new file mode 100644 index 00000000000..329796191de --- /dev/null +++ b/vnet/vnet/flow/ipfix_packet.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ipfix_packet_h__ +#define __included_ipfix_packet_h__ + +#include <vnet/flow/ipfix_info_elements.h> + +/* From RFC-7011: + * https://tools.ietf.org/html/rfc7011 + */ + +typedef struct { + u32 version_length; + u32 export_time; + u32 sequence_number; + u32 domain_id; +} ipfix_message_header_t; + +static inline u32 version_length (u16 length) +{ + return clib_host_to_net_u32 (0x000a0000 | length); +} + + +/* + * The Field Specifier format is shown in Figure G. + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |E| Information Element ident. | Field Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Enterprise Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Figure G: Field Specifier Format + * + * Where: + * + * E + * + * Enterprise bit. This is the first bit of the Field Specifier. If + * this bit is zero, the Information Element identifier identifies an + * Information Element in [IANA-IPFIX], and the four-octet Enterprise + * Number field MUST NOT be present. If this bit is one, the + * Information Element identifier identifies an enterprise-specific + * Information Element, and the Enterprise Number field MUST be + * present. + */ + +typedef struct { + u32 e_id_length; + u32 enterprise; +} ipfix_enterprise_field_specifier_t; + +typedef struct { + u32 e_id_length; +} ipfix_field_specifier_t; + +static inline u32 ipfix_e_id_length (int e, u16 id, u16 length) +{ + u32 value; + value = (e<<31) | ((id&0x7FFF) <<16) | length; + return clib_host_to_net_u32 (value); +} + +/* + * Every Set contains a common header. This header is defined in + * Figure I. + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Set ID | Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Figure I: Set Header Format + * + * Each Set Header field is exported in network format. The fields are + * defined as follows: + * + * Set ID + * + * Identifies the Set. A value of 2 is reserved for Template Sets. + * A value of 3 is reserved for Options Template Sets. Values from 4 + * to 255 are reserved for future use. Values 256 and above are used + * for Data Sets. The Set ID values of 0 and 1 are not used, for + * historical reasons [RFC3954]. + * + * Length + * + * Total length of the Set, in octets, including the Set Header, all + * records, and the optional padding. Because an individual Set MAY + * contain multiple records, the Length value MUST be used to + * determine the position of the next Set. + */ + +typedef struct { + u32 set_id_length; +} ipfix_set_header_t; + +static inline u32 ipfix_set_id_length (u16 set_id, u16 length) +{ + return clib_host_to_net_u32 ((set_id<<16) | length); +} + +/* + * The format of the Template Record is shown in Figure J. It consists + * of a Template Record Header and one or more Field Specifiers. Field + * Specifiers are defined in Figure G above. + * + * +--------------------------------------------------+ + * | Template Record Header | + * +--------------------------------------------------+ + * | Field Specifier | + * +--------------------------------------------------+ + * | Field Specifier | + * +--------------------------------------------------+ + * ... + * +--------------------------------------------------+ + * | Field Specifier | + * +--------------------------------------------------+ + * + * Figure J: Template Record Format + * + * The format of the Template Record Header is shown in Figure K. + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Template ID (> 255) | Field Count | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Figure K: Template Record Header Format + * + * The Template Record Header Field definitions are as follows: + * + * Template ID + * + * Each Template Record is given a unique Template ID in the range + * 256 to 65535. This uniqueness is local to the Transport Session + * and Observation Domain that generated the Template ID. Since + * Template IDs are used as Set IDs in the Sets they describe (see + * Section 3.4.3), values 0-255 are reserved for special Set types + * (e.g., Template Sets themselves), and Templates and Options + * Templates (see Section 3.4.2) cannot share Template IDs within a + * Transport Session and Observation Domain. There are no + * constraints regarding the order of the Template ID allocation. As + * Exporting Processes are free to allocate Template IDs as they see + * fit, Collecting Processes MUST NOT assume incremental Template + * IDs, or anything about the contents of a Template based on its + * Template ID alone. + * + * Field Count + * + * Number of fields in this Template Record. + */ + +typedef struct { + u32 id_count; +} ipfix_template_header_t; + +static inline u32 ipfix_id_count (u16 id, u16 count) +{ + return clib_host_to_net_u32 ((id<<16) | count); +} + +/* Template packet */ +typedef struct { + ipfix_message_header_t h; + ipfix_set_header_t s; + ipfix_template_header_t t; + ipfix_field_specifier_t fields[0]; +} ipfix_template_packet_t; + +#endif /* __included_ipfix_packet_h__ */ diff --git a/vnet/vnet/global_funcs.h b/vnet/vnet/global_funcs.h new file mode 100644 index 00000000000..3958d88fc1e --- /dev/null +++ b/vnet/vnet/global_funcs.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * global_funcs.h: global data structure access functions + */ + +#ifndef included_vnet_global_funcs_h_ +#define included_vnet_global_funcs_h_ + +vnet_main_t * vnet_get_main (void); + +#endif /* included_vnet_global_funcs_h_ */ diff --git a/vnet/vnet/gre/error.def b/vnet/vnet/gre/error.def new file mode 100644 index 00000000000..161ecc1d874 --- /dev/null +++ b/vnet/vnet/gre/error.def @@ -0,0 +1,23 @@ +/* + * gre_error.def: gre errors + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +gre_error (NONE, "no error") +gre_error (UNKNOWN_PROTOCOL, "unknown protocol") +gre_error (UNSUPPORTED_VERSION, "unsupported version") +gre_error (PKTS_DECAP, "GRE input packets decapsulated") +gre_error (PKTS_ENCAP, "GRE output packets encapsulated") +gre_error (NO_SUCH_TUNNEL, "GRE input packets dropped due to missing tunnel") diff --git a/vnet/vnet/gre/gre.c b/vnet/vnet/gre/gre.c new file mode 100644 index 00000000000..c09816a2962 --- /dev/null +++ b/vnet/vnet/gre/gre.c @@ -0,0 +1,512 @@ +/* + * gre.c: gre + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/gre/gre.h> + +gre_main_t gre_main; + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; + gre_header_t gre; +}) ip4_and_gre_header_t; + +typedef struct { + union { + ip4_and_gre_header_t ip4_and_gre; + u64 as_u64[3]; + }; +} ip4_and_gre_union_t; + + +/* Packet trace structure */ +typedef struct { + /* Tunnel-id / index in tunnel vector */ + u32 tunnel_id; + + /* pkt length */ + u32 length; + + /* tunnel ip4 addresses */ + ip4_address_t src; + ip4_address_t dst; +} gre_tx_trace_t; + +u8 * format_gre_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + gre_tx_trace_t * t = va_arg (*args, gre_tx_trace_t *); + + s = format (s, "GRE: tunnel %d len %d src %U dst %U", + t->tunnel_id, clib_net_to_host_u16 (t->length), + format_ip4_address, &t->src.as_u8, + format_ip4_address, &t->dst.as_u8); + return s; +} + +u8 * format_gre_protocol (u8 * s, va_list * args) +{ + gre_protocol_t p = va_arg (*args, u32); + gre_main_t * gm = &gre_main; + gre_protocol_info_t * pi = gre_get_protocol_info (gm, p); + + if (pi) + s = format (s, "%s", pi->name); + else + s = format (s, "0x%04x", p); + + return s; +} + +u8 * format_gre_header_with_length (u8 * s, va_list * args) +{ + gre_main_t * gm = &gre_main; + gre_header_t * h = va_arg (*args, gre_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + gre_protocol_t p = clib_net_to_host_u16 (h->protocol); + uword indent, header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "gre header truncated"); + + indent = format_get_indent (s); + + s = format (s, "GRE %U", format_gre_protocol, p); + + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + { + gre_protocol_info_t * pi = gre_get_protocol_info (gm, p); + vlib_node_t * node = vlib_get_node (gm->vlib_main, pi->node_index); + if (node->format_buffer) + s = format (s, "\n%U%U", + format_white_space, indent, + node->format_buffer, (void *) (h + 1), + max_header_bytes - header_bytes); + } + + return s; +} + +u8 * format_gre_header (u8 * s, va_list * args) +{ + gre_header_t * h = va_arg (*args, gre_header_t *); + return format (s, "%U", format_gre_header_with_length, h, 0); +} + +/* Returns gre protocol as an int in host byte order. */ +uword +unformat_gre_protocol_host_byte_order (unformat_input_t * input, + va_list * args) +{ + u16 * result = va_arg (*args, u16 *); + gre_main_t * gm = &gre_main; + int i; + + /* Named type. */ + if (unformat_user (input, unformat_vlib_number_by_name, + gm->protocol_info_by_name, &i)) + { + gre_protocol_info_t * pi = vec_elt_at_index (gm->protocol_infos, i); + *result = pi->protocol; + return 1; + } + + return 0; +} + +uword +unformat_gre_protocol_net_byte_order (unformat_input_t * input, + va_list * args) +{ + u16 * result = va_arg (*args, u16 *); + if (! unformat_user (input, unformat_gre_protocol_host_byte_order, result)) + return 0; + *result = clib_host_to_net_u16 ((u16) *result); + return 1; +} + +uword +unformat_gre_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + gre_header_t _h, * h = &_h; + u16 p; + + if (! unformat (input, "%U", + unformat_gre_protocol_host_byte_order, &p)) + return 0; + + h->protocol = clib_host_to_net_u16 (p); + + /* Add header to result. */ + { + void * p; + u32 n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, n_bytes); + memcpy (p, h, n_bytes); + } + + return 1; +} + +static uword gre_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + /* + * Conundrum: packets from tun/tap destined for the tunnel + * actually have this rewrite applied. Transit packets do not. + * To make the two cases equivalent, don't generate a + * rewrite here, build the entire header in the fast path. + */ + return 0; + +#ifdef THINGS_WORKED_AS_ONE_MIGHT_LIKE + ip4_and_gre_header_t * h = rewrite; + gre_protocol_t protocol; + + if (max_rewrite_bytes < sizeof (h[0])) + return 0; + + switch (l3_type) { +#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = GRE_PROTOCOL_##b; break + _ (IP4, ip4); + _ (IP6, ip6); +#undef _ + default: + return 0; + } + + memset (h, 0, sizeof (*h)); + h->ip4.ip_version_and_header_length = 0x45; + h->ip4.ttl = 64; + h->ip4.protocol = IP_PROTOCOL_GRE; + h->gre.protocol = clib_host_to_net_u16 (protocol); + + return sizeof (h[0]); +#endif +} + +static uword +gre_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + gre_main_t * gm = &gre_main; + u32 next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + vnet_interface_output_runtime_t * rd = (void *) node->runtime_data; + gre_tunnel_t *t = pool_elt_at_index (gm->tunnels, rd->dev_instance); + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* + * As long as we have enough pkts left to process two pkts + * and prefetch two pkts... + */ + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * b0, * b1; + ip4_header_t * ip0, * ip1; + ip4_and_gre_union_t * h0, * h1; + u32 bi0, next0, bi1, next1; + __attribute__((unused)) u8 error0, error1; + u16 gre_protocol0, gre_protocol1; + + /* Prefetch the next iteration */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + /* + * Prefetch packet data. We expect to overwrite + * the inbound L2 header with an ip header and a + * gre header. Might want to prefetch the last line + * of rewrite space as well; need profile data + */ + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* Pick up the next two buffer indices */ + bi0 = from[0]; + bi1 = from[1]; + + /* Speculatively enqueue them where we sent the last buffer */ + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = t->outer_fib_index; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = t->outer_fib_index; + + ip0 = vlib_buffer_get_current (b0); + gre_protocol0 = clib_net_to_host_u16 (0x800); + gre_protocol0 = + ((ip0->ip_version_and_header_length & 0xF0) == 0x60) ? + 0x86DD : gre_protocol0; + + ip1 = vlib_buffer_get_current (b1); + gre_protocol1 = clib_net_to_host_u16 (0x800); + gre_protocol1 = + ((ip1->ip_version_and_header_length & 0xF0) == 0x60) ? + 0x86DD : gre_protocol1; + + vlib_buffer_advance (b0, -sizeof(*h0)); + vlib_buffer_advance (b1, -sizeof(*h1)); + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + h0->as_u64[0] = 0; + h0->as_u64[1] = 0; + h0->as_u64[2] = 0; + + h1->as_u64[0] = 0; + h1->as_u64[1] = 0; + h1->as_u64[2] = 0; + + ip0 = &h0->ip4_and_gre.ip4; + h0->ip4_and_gre.gre.protocol = gre_protocol0; + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_GRE; + + ip1 = &h1->ip4_and_gre.ip4; + h1->ip4_and_gre.gre.protocol = gre_protocol1; + ip1->ip_version_and_header_length = 0x45; + ip1->ttl = 254; + ip1->protocol = IP_PROTOCOL_GRE; + + ip0->length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + ip1->length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); + ip0->src_address.as_u32 = t->tunnel_src.as_u32; + ip1->src_address.as_u32 = t->tunnel_src.as_u32; + ip0->dst_address.as_u32 = t->tunnel_dst.as_u32; + ip1->dst_address.as_u32 = t->tunnel_dst.as_u32; + ip0->checksum = ip4_header_checksum (ip0); + ip1->checksum = ip4_header_checksum (ip1); + + /* ip4_lookup will route to the tunnel partner */ + next0 = GRE_OUTPUT_NEXT_LOOKUP; + next1 = GRE_OUTPUT_NEXT_LOOKUP; + error0 = GRE_ERROR_NONE; + error1 = GRE_ERROR_NONE; + + /* + * Enqueue 2 pkts. This macro deals with next0 != next1, + * acquiring enqueue rights to the indicated next + * node input frame, etc. + */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + ip4_header_t * ip0; + ip4_and_gre_union_t * h0; + u32 bi0, next0; + __attribute__((unused)) u8 error0; + u16 gre_protocol0; + + bi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = t->outer_fib_index; + ip0 = vlib_buffer_get_current (b0); + gre_protocol0 = clib_net_to_host_u16 (0x800); + gre_protocol0 = + ((ip0->ip_version_and_header_length & 0xF0) == 0x60) ? + 0x86DD : gre_protocol0; + + vlib_buffer_advance (b0, -sizeof(*h0)); + + h0 = vlib_buffer_get_current (b0); + h0->as_u64[0] = 0; + h0->as_u64[1] = 0; + h0->as_u64[2] = 0; + + ip0 = &h0->ip4_and_gre.ip4; + h0->ip4_and_gre.gre.protocol = gre_protocol0; + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_GRE; + ip0->length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + ip0->src_address.as_u32 = t->tunnel_src.as_u32; + ip0->dst_address.as_u32 = t->tunnel_dst.as_u32; + ip0->checksum = ip4_header_checksum (ip0); + + next0 = GRE_OUTPUT_NEXT_LOOKUP; + error0 = GRE_ERROR_NONE; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = t - gm->tunnels; + tr->length = ip0->length; + tr->src.as_u32 = ip0->src_address.as_u32; + tr->dst.as_u32 = ip0->dst_address.as_u32; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, gre_input_node.index, + GRE_ERROR_PKTS_ENCAP, frame->n_vectors); + + return frame->n_vectors; +} + +static u8 * format_gre_tunnel_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "gre%d", dev_instance); +} + +static u8 * format_gre_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + + s = format (s, "GRE tunnel: id %d\n", dev_instance); + return s; +} + +VNET_DEVICE_CLASS (gre_device_class) = { + .name = "GRE tunnel device", + .format_device_name = format_gre_tunnel_name, + .format_device = format_gre_device, + .format_tx_trace = format_gre_tx_trace, + .tx_function = gre_interface_tx, +#ifdef SOON + .clear counter = 0; + .admin_up_down_function = 0; +#endif +}; + + +VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = { + .name = "GRE", + .format_header = format_gre_header_with_length, + .unformat_header = unformat_gre_header, + .set_rewrite = gre_set_rewrite, +}; + +static void add_protocol (gre_main_t * gm, + gre_protocol_t protocol, + char * protocol_name) +{ + gre_protocol_info_t * pi; + u32 i; + + vec_add2 (gm->protocol_infos, pi, 1); + i = pi - gm->protocol_infos; + + pi->name = protocol_name; + pi->protocol = protocol; + pi->next_index = pi->node_index = ~0; + + hash_set (gm->protocol_info_by_protocol, protocol, i); + hash_set_mem (gm->protocol_info_by_name, pi->name, i); +} + +static clib_error_t * gre_init (vlib_main_t * vm) +{ + gre_main_t * gm = &gre_main; + clib_error_t * error; + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi; + + memset (gm, 0, sizeof (gm[0])); + gm->vlib_main = vm; + gm->vnet_main = vnet_get_main(); + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip4_lookup_init))) + return error; + + /* Set up the ip packet generator */ + pi = ip_get_protocol_info (im, IP_PROTOCOL_GRE); + pi->format_header = format_gre_header; + pi->unformat_pg_edit = unformat_pg_gre_header; + + gm->protocol_info_by_name = hash_create_string (0, sizeof (uword)); + gm->protocol_info_by_protocol = hash_create (0, sizeof (uword)); + gm->tunnel_by_key = hash_create (0, sizeof (uword)); + +#define _(n,s) add_protocol (gm, GRE_PROTOCOL_##s, #s); + foreach_gre_protocol +#undef _ + + return vlib_call_init_function (vm, gre_input_init); +} + +VLIB_INIT_FUNCTION (gre_init); + +gre_main_t * gre_get_main (vlib_main_t * vm) +{ + vlib_call_init_function (vm, gre_init); + return &gre_main; +} + diff --git a/vnet/vnet/gre/gre.h b/vnet/vnet/gre/gre.h new file mode 100644 index 00000000000..c0689f60ddf --- /dev/null +++ b/vnet/vnet/gre/gre.h @@ -0,0 +1,118 @@ +/* + * gre.h: types/functions for gre. + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_gre_h +#define included_gre_h + +#include <vnet/vnet.h> +#include <vnet/gre/packet.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/ip4.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/pg/pg.h> +#include <vnet/ip/format.h> + +vnet_hw_interface_class_t gre_hw_interface_class; + +typedef enum { +#define gre_error(n,s) GRE_ERROR_##n, +#include <vnet/gre/error.def> +#undef gre_error + GRE_N_ERROR, +} gre_error_t; + +typedef struct { + /* Name (a c string). */ + char * name; + + /* GRE protocol type in host byte order. */ + gre_protocol_t protocol; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} gre_protocol_info_t; + +typedef struct { + ip4_address_t tunnel_src; + ip4_address_t tunnel_dst; + u32 outer_fib_index; + u32 hw_if_index; +} gre_tunnel_t; + +typedef struct { + /* pool of tunnel instances */ + gre_tunnel_t *tunnels; + + gre_protocol_info_t * protocol_infos; + + /* Hash tables mapping name/protocol to protocol info index. */ + uword * protocol_info_by_name, * protocol_info_by_protocol; + /* Hash mapping src/dst addr pair to tunnel */ + uword * tunnel_by_key; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} gre_main_t; + +always_inline gre_protocol_info_t * +gre_get_protocol_info (gre_main_t * em, gre_protocol_t protocol) +{ + uword * p = hash_get (em->protocol_info_by_protocol, protocol); + return p ? vec_elt_at_index (em->protocol_infos, p[0]) : 0; +} + +gre_main_t gre_main; + +/* Register given node index to take input for given gre type. */ +void +gre_register_input_type (vlib_main_t * vm, + gre_protocol_t protocol, + u32 node_index); + +void gre_set_adjacency (vnet_rewrite_header_t * rw, + uword max_data_bytes, + gre_protocol_t protocol); + +format_function_t format_gre_protocol; +format_function_t format_gre_header; +format_function_t format_gre_header_with_length; + +vlib_node_registration_t gre_input_node; +vnet_device_class_t gre_device_class; + +/* Parse gre protocol as 0xXXXX or protocol name. + In either host or network byte order. */ +unformat_function_t unformat_gre_protocol_host_byte_order; +unformat_function_t unformat_gre_protocol_net_byte_order; + +/* Parse gre header. */ +unformat_function_t unformat_gre_header; +unformat_function_t unformat_pg_gre_header; + +void +gre_register_input_protocol (vlib_main_t * vm, + gre_protocol_t protocol, + u32 node_index); + +/* manually added to the interface output node in gre.c */ +#define GRE_OUTPUT_NEXT_LOOKUP 1 + +#endif /* included_gre_h */ diff --git a/vnet/vnet/gre/interface.c b/vnet/vnet/gre/interface.c new file mode 100644 index 00000000000..72ce0962fc7 --- /dev/null +++ b/vnet/vnet/gre/interface.c @@ -0,0 +1,150 @@ +/* + * gre_interface.c: gre interfaces + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/gre/gre.h> + +int +gre_register_interface (vnet_main_t * vnm, + u32 dev_class_index, + ip4_address_t *tunnel_src, + ip4_address_t *tunnel_dst, + u32 outer_fib_id, + u32 * gi_index_return) +{ + gre_main_t * gm = &gre_main; + ip4_main_t * im = &ip4_main; + gre_tunnel_t * t; + vnet_hw_interface_t * hi; + u32 hw_if_index; + u32 slot; + u32 outer_fib_index; + uword * p; + + u64 key = (u64)tunnel_src->as_u32 << 32 | (u64)tunnel_dst->as_u32; + + /* check if same src/dst pair exists */ + if (hash_get (gm->tunnel_by_key, key)) + return VNET_API_ERROR_INVALID_VALUE; + + p = hash_get (im->fib_index_by_table_id, outer_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + + outer_fib_index = p[0]; + + pool_get (gm->tunnels, t); + memset (t, 0, sizeof (*t)); + + hw_if_index = vnet_register_interface + (vnm, gre_device_class.index, t - gm->tunnels, + gre_hw_interface_class.index, + t - gm->tunnels); + + *gi_index_return = t - gm->tunnels; + + t->hw_if_index = hw_if_index; + t->outer_fib_index = outer_fib_index; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + + hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip4_header_t); + hi->per_packet_overhead_bytes = + /* preamble */ 8 + /* inter frame gap */ 12; + + /* Standard default gre MTU. */ + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1500; + + memcpy (&t->tunnel_src, tunnel_src, sizeof (t->tunnel_src)); + memcpy (&t->tunnel_dst, tunnel_dst, sizeof (t->tunnel_dst)); + + hash_set (gm->tunnel_by_key, key, t - gm->tunnels); + + slot = vlib_node_add_named_next_with_slot + (vnm->vlib_main, hi->tx_node_index, "ip4-lookup", GRE_OUTPUT_NEXT_LOOKUP); + + ASSERT (slot == GRE_OUTPUT_NEXT_LOOKUP); + + return 0; +} + + +static clib_error_t * +create_gre_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t src, dst; + u32 outer_fib_id = 0; + int rv; + u32 gi_index; + u32 num_m_args = 0; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "src %U", unformat_ip4_address, &src)) + num_m_args++; + else if (unformat (line_input, "dst %U", unformat_ip4_address, &dst)) + num_m_args++; + else if (unformat (line_input, "outer-fib-id %d", &outer_fib_id)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (num_m_args < 2) + return clib_error_return (0, "mandatory argument(s) missing"); + + rv = gre_register_interface (vnm, gre_hw_interface_class.index, + &src, &dst, outer_fib_id, &gi_index); + + switch(rv) + { + case 0: + break; + case VNET_API_ERROR_INVALID_VALUE: + return clib_error_return (0, "GRE tunnel already exists..."); + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "outer fib ID %d doesn't exist\n", + outer_fib_id); + default: + return clib_error_return (0, "gre_register_interface returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = { + .path = "create gre tunnel", + .short_help = "create gre tunnel src <addr> dst <addr> [outer-fib-id <fib>]", + .function = create_gre_tunnel_command_fn, +}; + +/* force inclusion from application's main.c */ +clib_error_t *gre_interface_init (vlib_main_t *vm) +{ + return 0; +} +VLIB_INIT_FUNCTION(gre_interface_init); diff --git a/vnet/vnet/gre/node.c b/vnet/vnet/gre/node.c new file mode 100644 index 00000000000..7d07223fc71 --- /dev/null +++ b/vnet/vnet/gre/node.c @@ -0,0 +1,533 @@ +/* + * node.c: gre packet processing + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/gre/gre.h> +#include <vppinfra/sparse_vec.h> + +#define foreach_gre_input_next \ +_(PUNT, "error-punt") \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(IP6_INPUT, "ip6-input") + +typedef enum { +#define _(s,n) GRE_INPUT_NEXT_##s, + foreach_gre_input_next +#undef _ + GRE_INPUT_N_NEXT, +} gre_input_next_t; + +typedef struct { + u32 tunnel_id; + u32 length; + ip4_address_t src; + ip4_address_t dst; +} gre_rx_trace_t; + +u8 * format_gre_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + gre_rx_trace_t * t = va_arg (*args, gre_rx_trace_t *); + + s = format (s, "GRE: tunnel %d len %d src %U dst %U", + t->tunnel_id, clib_net_to_host_u16(t->length), + format_ip4_address, &t->src.as_u8, + format_ip4_address, &t->dst.as_u8); + return s; +} + +typedef struct { + /* Sparse vector mapping gre protocol in network byte order + to next index. */ + u16 * next_by_protocol; + + u32 * sparse_index_by_next_index; +} gre_input_runtime_t; + +static uword +gre_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + gre_main_t * gm = &gre_main; + gre_input_runtime_t * rt = (void *) node->runtime_data; + __attribute__((unused)) u32 n_left_from, next_index, i_next, * from, * to_next; + u64 cached_tunnel_key = (u64) ~0; + u32 cached_tunnel_sw_if_index = 0, tunnel_sw_if_index; + u32 cached_tunnel_fib_index = 0, tunnel_fib_index; + + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + i_next = vec_elt (rt->sparse_index_by_next_index, next_index); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + gre_header_t * h0, * h1; + u16 version0, version1; + int verr0, verr1; + u32 i0, i1, next0, next1, protocol0, protocol1; + ip4_header_t *ip0, *ip1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* ip4_local hands us the ip header, not the gre header */ + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* Save src + dst ip4 address, e.g. for mpls-o-gre */ + vnet_buffer(b0)->gre.src = ip0->src_address.as_u32; + vnet_buffer(b0)->gre.dst = ip0->dst_address.as_u32; + vnet_buffer(b1)->gre.src = ip1->src_address.as_u32; + vnet_buffer(b1)->gre.dst = ip1->dst_address.as_u32; + + vlib_buffer_advance (b0, sizeof (*ip0)); + vlib_buffer_advance (b1, sizeof (*ip1)); + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + /* Index sparse array with network byte order. */ + protocol0 = h0->protocol; + protocol1 = h1->protocol; + sparse_vec_index2 (rt->next_by_protocol, protocol0, protocol1, + &i0, &i1); + next0 = vec_elt(rt->next_by_protocol, i0); + next1 = vec_elt(rt->next_by_protocol, i1); + + b0->error = node->errors[next0 == SPARSE_VEC_INVALID_INDEX ? GRE_ERROR_UNKNOWN_PROTOCOL : GRE_ERROR_NONE]; + b1->error = node->errors[next1 == SPARSE_VEC_INVALID_INDEX ? GRE_ERROR_UNKNOWN_PROTOCOL : GRE_ERROR_NONE]; + + version0 = clib_net_to_host_u16 (h0->flags_and_version); + verr0 = version0 & GRE_VERSION_MASK; + version1 = clib_net_to_host_u16 (h1->flags_and_version); + verr1 = version1 & GRE_VERSION_MASK; + + b0->error = verr0 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] + : b0->error; + next0 = verr0 ? GRE_INPUT_NEXT_DROP : next0; + b1->error = verr1 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] + : b1->error; + next1 = verr1 ? GRE_INPUT_NEXT_DROP : next1; + + /* RPF check for ip4/ip6 input */ + if (PREDICT_FALSE(next0 == GRE_INPUT_NEXT_IP4_INPUT + || next0 == GRE_INPUT_NEXT_IP6_INPUT)) + { + u64 key = ((u64)(vnet_buffer(b0)->gre.dst) << 32) | + (u64)(vnet_buffer(b0)->gre.src); + + if (cached_tunnel_key != key) + { + vnet_hw_interface_t * hi; + gre_tunnel_t * t; + uword * p; + + ip4_main_t * ip4m = &ip4_main; + p = hash_get (gm->tunnel_by_key, key); + if (!p) + { + next0 = GRE_INPUT_NEXT_DROP; + b0->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL]; + goto drop0; + } + t = pool_elt_at_index (gm->tunnels, p[0]); + hi = vnet_get_hw_interface (gm->vnet_main, + t->hw_if_index); + tunnel_sw_if_index = hi->sw_if_index; + tunnel_fib_index = vec_elt (ip4m->fib_index_by_sw_if_index, + tunnel_sw_if_index); + + cached_tunnel_sw_if_index = tunnel_sw_if_index; + cached_tunnel_fib_index = tunnel_fib_index; + } + else + { + tunnel_sw_if_index = cached_tunnel_sw_if_index; + tunnel_fib_index = cached_tunnel_fib_index; + } + + u32 len = vlib_buffer_length_in_chain (vm, b0); + vnet_interface_main_t *im = &gm->vnet_main->interface_main; + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + tunnel_sw_if_index, + 1 /* packets */, + len /* bytes */); + + vnet_buffer(b0)->sw_if_index[VLIB_TX] = tunnel_fib_index; + } + +drop0: + if (PREDICT_FALSE(next1 == GRE_INPUT_NEXT_IP4_INPUT + || next1 == GRE_INPUT_NEXT_IP6_INPUT)) + { + u64 key = ((u64)(vnet_buffer(b1)->gre.dst) << 32) | + (u64)(vnet_buffer(b1)->gre.src); + + if (cached_tunnel_key != key) + { + vnet_hw_interface_t * hi; + gre_tunnel_t * t; + uword * p; + + ip4_main_t * ip4m = &ip4_main; + p = hash_get (gm->tunnel_by_key, key); + if (!p) + { + next1 = GRE_INPUT_NEXT_DROP; + b1->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL]; + goto drop1; + } + t = pool_elt_at_index (gm->tunnels, p[0]); + hi = vnet_get_hw_interface (gm->vnet_main, + t->hw_if_index); + tunnel_sw_if_index = hi->sw_if_index; + tunnel_fib_index = vec_elt (ip4m->fib_index_by_sw_if_index, + tunnel_sw_if_index); + + cached_tunnel_sw_if_index = tunnel_sw_if_index; + cached_tunnel_fib_index = tunnel_fib_index; + } + else + { + tunnel_sw_if_index = cached_tunnel_sw_if_index; + tunnel_fib_index = cached_tunnel_fib_index; + } + + u32 len = vlib_buffer_length_in_chain (vm, b1); + vnet_interface_main_t *im = &gm->vnet_main->interface_main; + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + tunnel_sw_if_index, + 1 /* packets */, + len /* bytes */); + + vnet_buffer(b1)->sw_if_index[VLIB_TX] = tunnel_fib_index; + } +drop1: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + gre_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = ~0; + tr->length = ip0->length; + tr->src.as_u32 = ip0->src_address.as_u32; + tr->dst.as_u32 = ip0->dst_address.as_u32; + } + + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + gre_rx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->tunnel_id = ~0; + tr->length = ip1->length; + tr->src.as_u32 = ip1->src_address.as_u32; + tr->dst.as_u32 = ip1->dst_address.as_u32; + } + + vlib_buffer_advance (b0, sizeof (*h0)); + vlib_buffer_advance (b1, sizeof (*h1)); + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + gre_header_t * h0; + ip4_header_t * ip0; + u16 version0; + int verr0; + u32 i0, next0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + vnet_buffer(b0)->gre.src = ip0->src_address.as_u32; + vnet_buffer(b0)->gre.dst = ip0->dst_address.as_u32; + + vlib_buffer_advance (b0, sizeof (*ip0)); + + h0 = vlib_buffer_get_current (b0); + + i0 = sparse_vec_index (rt->next_by_protocol, h0->protocol); + next0 = vec_elt(rt->next_by_protocol, i0); + + b0->error = + node->errors[next0 == SPARSE_VEC_INVALID_INDEX + ? GRE_ERROR_UNKNOWN_PROTOCOL : GRE_ERROR_NONE]; + + version0 = clib_net_to_host_u16 (h0->flags_and_version); + verr0 = version0 & GRE_VERSION_MASK; + b0->error = verr0 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] + : b0->error; + next0 = verr0 ? GRE_INPUT_NEXT_DROP : next0; + + /* For IP payload we need to find source interface + so we can increase counters and help forward node to + pick right FIB */ + if (PREDICT_FALSE(next0 == GRE_INPUT_NEXT_IP4_INPUT + || next0 == GRE_INPUT_NEXT_IP6_INPUT)) + { + u64 key = ((u64)(vnet_buffer(b0)->gre.dst) << 32) | + (u64)(vnet_buffer(b0)->gre.src); + + if (cached_tunnel_key != key) + { + vnet_hw_interface_t * hi; + gre_tunnel_t * t; + uword * p; + + ip4_main_t * ip4m = &ip4_main; + p = hash_get (gm->tunnel_by_key, key); + if (!p) + { + next0 = GRE_INPUT_NEXT_DROP; + b0->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL]; + goto drop; + } + t = pool_elt_at_index (gm->tunnels, p[0]); + hi = vnet_get_hw_interface (gm->vnet_main, + t->hw_if_index); + tunnel_sw_if_index = hi->sw_if_index; + tunnel_fib_index = vec_elt (ip4m->fib_index_by_sw_if_index, + tunnel_sw_if_index); + + cached_tunnel_sw_if_index = tunnel_sw_if_index; + cached_tunnel_fib_index = tunnel_fib_index; + } + else + { + tunnel_sw_if_index = cached_tunnel_sw_if_index; + tunnel_fib_index = cached_tunnel_fib_index; + } + + u32 len = vlib_buffer_length_in_chain (vm, b0); + vnet_interface_main_t *im = &gm->vnet_main->interface_main; + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + tunnel_sw_if_index, + 1 /* packets */, + len /* bytes */); + + vnet_buffer(b0)->sw_if_index[VLIB_TX] = tunnel_fib_index; + } + +drop: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + gre_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = ~0; + tr->length = ip0->length; + tr->src.as_u32 = ip0->src_address.as_u32; + tr->dst.as_u32 = ip0->dst_address.as_u32; + } + + vlib_buffer_advance (b0, sizeof (*h0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, gre_input_node.index, + GRE_ERROR_PKTS_DECAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +static char * gre_error_strings[] = { +#define gre_error(n,s) s, +#include "error.def" +#undef gre_error +}; + +VLIB_REGISTER_NODE (gre_input_node) = { + .function = gre_input, + .name = "gre-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof (gre_input_runtime_t), + + .n_errors = GRE_N_ERROR, + .error_strings = gre_error_strings, + + .n_next_nodes = GRE_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [GRE_INPUT_NEXT_##s] = n, + foreach_gre_input_next +#undef _ + }, + + .format_buffer = format_gre_header_with_length, + .format_trace = format_gre_rx_trace, + .unformat_buffer = unformat_gre_header, +}; + +void +gre_register_input_protocol (vlib_main_t * vm, + gre_protocol_t protocol, + u32 node_index) +{ + gre_main_t * em = &gre_main; + gre_protocol_info_t * pi; + gre_input_runtime_t * rt; + u16 * n; + u32 i; + + { + clib_error_t * error = vlib_call_init_function (vm, gre_input_init); + if (error) + clib_error_report (error); + } + + pi = gre_get_protocol_info (em, protocol); + pi->node_index = node_index; + pi->next_index = vlib_node_add_next (vm, + gre_input_node.index, + node_index); + + /* Setup gre protocol -> next index sparse vector mapping. */ + rt = vlib_node_get_runtime_data (vm, gre_input_node.index); + n = sparse_vec_validate (rt->next_by_protocol, + clib_host_to_net_u16 (protocol)); + n[0] = pi->next_index; + + /* Rebuild next index -> sparse index inverse mapping when sparse vector + is updated. */ + vec_validate (rt->sparse_index_by_next_index, pi->next_index); + for (i = 1; i < vec_len (rt->next_by_protocol); i++) + rt->sparse_index_by_next_index[rt->next_by_protocol[i]] = i; +} + +static void +gre_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + pg_node_t * pn = pg_get_node (node_index); + + n->format_buffer = format_gre_header_with_length; + n->unformat_buffer = unformat_gre_header; + pn->unformat_edit = unformat_pg_gre_header; +} + +static clib_error_t * gre_input_init (vlib_main_t * vm) +{ + gre_input_runtime_t * rt; + vlib_node_t *ip4_input, *ip6_input, *mpls_unicast_input; + + { + clib_error_t * error; + error = vlib_call_init_function (vm, gre_init); + if (error) + clib_error_report (error); + } + + gre_setup_node (vm, gre_input_node.index); + + rt = vlib_node_get_runtime_data (vm, gre_input_node.index); + + rt->next_by_protocol = sparse_vec_new + (/* elt bytes */ sizeof (rt->next_by_protocol[0]), + /* bits in index */ BITS (((gre_header_t *) 0)->protocol)); + + vec_validate (rt->sparse_index_by_next_index, GRE_INPUT_NEXT_DROP); + vec_validate (rt->sparse_index_by_next_index, GRE_INPUT_NEXT_PUNT); + rt->sparse_index_by_next_index[GRE_INPUT_NEXT_DROP] + = SPARSE_VEC_INVALID_INDEX; + rt->sparse_index_by_next_index[GRE_INPUT_NEXT_PUNT] + = SPARSE_VEC_INVALID_INDEX; + + /* These could be moved to the supported protocol input node defn's */ + ip4_input = vlib_get_node_by_name (vm, (u8 *)"ip4-input"); + ASSERT(ip4_input); + ip6_input = vlib_get_node_by_name (vm, (u8 *)"ip6-input"); + ASSERT(ip6_input); + mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *)"mpls-gre-input"); + ASSERT(mpls_unicast_input); + + gre_register_input_protocol (vm, GRE_PROTOCOL_ip4, + ip4_input->index); + + gre_register_input_protocol (vm, GRE_PROTOCOL_ip6, + ip6_input->index); + + gre_register_input_protocol (vm, GRE_PROTOCOL_mpls_unicast, + mpls_unicast_input->index); + + ip4_register_protocol (IP_PROTOCOL_GRE, gre_input_node.index); + + return 0; +} + +VLIB_INIT_FUNCTION (gre_input_init); diff --git a/vnet/vnet/gre/packet.h b/vnet/vnet/gre/packet.h new file mode 100644 index 00000000000..573f2624bf8 --- /dev/null +++ b/vnet/vnet/gre/packet.h @@ -0,0 +1,54 @@ +#ifndef included_vnet_gre_packet_h +#define included_vnet_gre_packet_h + +/* + * GRE packet format + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define foreach_gre_protocol \ +_ (0x0800, ip4) \ +_ (0x86DD, ip6) \ +_ (0x0806, arp) \ +_ (0x8847, mpls_unicast) \ +_ (0x894F, nsh) + +typedef enum { +#define _(n,f) GRE_PROTOCOL_##f = n, + foreach_gre_protocol +#undef _ +} gre_protocol_t; + +typedef struct { + /* flags and version */ + u16 flags_and_version; + /* unimplemented at the moment */ +#define GRE_FLAGS_CHECKSUM (1 << 15) + + /* deprecated, according to rfc2784 */ +#define GRE_FLAGS_ROUTING (1 << 14) +#define GRE_FLAGS_KEY (1 << 13) +#define GRE_FLAGS_SEQUENCE (1 << 12) +#define GRE_FLAGS_STRICT_SOURCE_ROUTE (1 << 11) + + /* version 1 is PPTP which we don't support */ +#define GRE_SUPPORTED_VERSION 0 +#define GRE_VERSION_MASK 0x7 + + /* 0x800 for ip4, etc. */ + u16 protocol; +} gre_header_t; + +#endif /* included_vnet_gre_packet_h */ diff --git a/vnet/vnet/gre/pg.c b/vnet/vnet/gre/pg.c new file mode 100644 index 00000000000..cc065d3b6b5 --- /dev/null +++ b/vnet/vnet/gre/pg.c @@ -0,0 +1,77 @@ +/* + * hdlc_pg.c: packet generator gre interface + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/gre/gre.h> + +typedef struct { + pg_edit_t flags_and_version; + pg_edit_t protocol; +} pg_gre_header_t; + +static inline void +pg_gre_header_init (pg_gre_header_t * e) +{ + pg_edit_init (&e->flags_and_version, gre_header_t, flags_and_version); + pg_edit_init (&e->protocol, gre_header_t, protocol); +} + +uword +unformat_pg_gre_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_gre_header_t * h; + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (gre_header_t), + &group_index); + pg_gre_header_init (h); + + pg_edit_set_fixed (&h->flags_and_version, 0); + + error = 1; + if (! unformat (input, "%U", + unformat_pg_edit, + unformat_gre_protocol_net_byte_order, &h->protocol)) + goto done; + + { + gre_main_t * pm = &gre_main; + gre_protocol_info_t * pi = 0; + pg_node_t * pg_node = 0; + + if (h->protocol.type == PG_EDIT_FIXED) + { + u16 t = *(u16 *) h->protocol.values[PG_EDIT_LO]; + pi = gre_get_protocol_info (pm, clib_net_to_host_u16 (t)); + if (pi && pi->node_index != ~0) + pg_node = pg_get_node (pi->node_index); + } + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + } + + error = 0; + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/hdlc/error.def b/vnet/vnet/hdlc/error.def new file mode 100644 index 00000000000..16e001bbcfa --- /dev/null +++ b/vnet/vnet/hdlc/error.def @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * hdlc_error.def: hdlc errors + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +hdlc_error (NONE, "no error") +hdlc_error (UNKNOWN_PROTOCOL, "unknown hdlc protocol") +hdlc_error (UNKNOWN_ADDRESS_CONTROL, "address, control != 0x0f00") diff --git a/vnet/vnet/hdlc/hdlc.c b/vnet/vnet/hdlc/hdlc.c new file mode 100644 index 00000000000..bdbe89466b9 --- /dev/null +++ b/vnet/vnet/hdlc/hdlc.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * hdlc.c: hdlc + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/hdlc/hdlc.h> + +/* Global main structure. */ +hdlc_main_t hdlc_main; + +u8 * format_hdlc_protocol (u8 * s, va_list * args) +{ + hdlc_protocol_t p = va_arg (*args, u32); + hdlc_main_t * pm = &hdlc_main; + hdlc_protocol_info_t * pi = hdlc_get_protocol_info (pm, p); + + if (pi) + s = format (s, "%s", pi->name); + else + s = format (s, "0x%04x", p); + + return s; +} + +u8 * format_hdlc_header_with_length (u8 * s, va_list * args) +{ + hdlc_main_t * pm = &hdlc_main; + hdlc_header_t * h = va_arg (*args, hdlc_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + hdlc_protocol_t p = clib_net_to_host_u16 (h->protocol); + uword indent, header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "hdlc header truncated"); + + indent = format_get_indent (s); + + s = format (s, "HDLC %U", format_hdlc_protocol, p); + + if (h->address != 0xff) + s = format (s, ", address 0x%02x", h->address); + if (h->control != 0x03) + s = format (s, ", control 0x%02x", h->control); + + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + { + hdlc_protocol_info_t * pi = hdlc_get_protocol_info (pm, p); + vlib_node_t * node = vlib_get_node (pm->vlib_main, pi->node_index); + if (node->format_buffer) + s = format (s, "\n%U%U", + format_white_space, indent, + node->format_buffer, (void *) (h + 1), + max_header_bytes - header_bytes); + } + + return s; +} + +u8 * format_hdlc_header (u8 * s, va_list * args) +{ + hdlc_header_t * h = va_arg (*args, hdlc_header_t *); + return format (s, "%U", format_hdlc_header_with_length, h, 0); +} + +/* Returns hdlc protocol as an int in host byte order. */ +uword +unformat_hdlc_protocol_host_byte_order (unformat_input_t * input, + va_list * args) +{ + u16 * result = va_arg (*args, u16 *); + hdlc_main_t * pm = &hdlc_main; + int p, i; + + /* Numeric type. */ + if (unformat (input, "0x%x", &p) + || unformat (input, "%d", &p)) + { + if (p >= (1 << 16)) + return 0; + *result = p; + return 1; + } + + /* Named type. */ + if (unformat_user (input, unformat_vlib_number_by_name, + pm->protocol_info_by_name, &i)) + { + hdlc_protocol_info_t * pi = vec_elt_at_index (pm->protocol_infos, i); + *result = pi->protocol; + return 1; + } + + return 0; +} + +uword +unformat_hdlc_protocol_net_byte_order (unformat_input_t * input, + va_list * args) +{ + u16 * result = va_arg (*args, u16 *); + if (! unformat_user (input, unformat_hdlc_protocol_host_byte_order, result)) + return 0; + *result = clib_host_to_net_u16 ((u16) *result); + return 1; +} + +uword +unformat_hdlc_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + hdlc_header_t _h, * h = &_h; + u16 p; + + if (! unformat (input, "%U", + unformat_hdlc_protocol_host_byte_order, &p)) + return 0; + + h->address = 0xff; + h->control = 0x03; + h->protocol = clib_host_to_net_u16 (p); + + /* Add header to result. */ + { + void * p; + u32 n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, n_bytes); + memcpy (p, h, n_bytes); + } + + return 1; +} + +static uword hdlc_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + hdlc_header_t * h = rewrite; + hdlc_protocol_t protocol; + + if (max_rewrite_bytes < sizeof (h[0])) + return 0; + + switch (l3_type) { +#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = HDLC_PROTOCOL_##b; break + _ (IP4, ip4); + _ (IP6, ip6); + _ (MPLS_UNICAST, mpls_unicast); + _ (MPLS_MULTICAST, mpls_multicast); +#undef _ + default: + return 0; + } + + h->address = 0x0f; + h->control = 0x00; + h->protocol = clib_host_to_net_u16 (protocol); + + return sizeof (h[0]); +} + +VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = { + .name = "HDLC", + .format_header = format_hdlc_header_with_length, + .unformat_header = unformat_hdlc_header, + .set_rewrite = hdlc_set_rewrite, +}; + +static void add_protocol (hdlc_main_t * pm, + hdlc_protocol_t protocol, + char * protocol_name) +{ + hdlc_protocol_info_t * pi; + u32 i; + + vec_add2 (pm->protocol_infos, pi, 1); + i = pi - pm->protocol_infos; + + pi->name = protocol_name; + pi->protocol = protocol; + pi->next_index = pi->node_index = ~0; + + hash_set (pm->protocol_info_by_protocol, protocol, i); + hash_set_mem (pm->protocol_info_by_name, pi->name, i); +} + +static clib_error_t * hdlc_init (vlib_main_t * vm) +{ + hdlc_main_t * pm = &hdlc_main; + + memset (pm, 0, sizeof (pm[0])); + pm->vlib_main = vm; + + pm->protocol_info_by_name = hash_create_string (0, sizeof (uword)); + pm->protocol_info_by_protocol = hash_create (0, sizeof (uword)); + +#define _(n,s) add_protocol (pm, HDLC_PROTOCOL_##s, #s); + foreach_hdlc_protocol +#undef _ + + return vlib_call_init_function (vm, hdlc_input_init); +} + +VLIB_INIT_FUNCTION (hdlc_init); + +hdlc_main_t * hdlc_get_main (vlib_main_t * vm) +{ + vlib_call_init_function (vm, hdlc_init); + return &hdlc_main; +} + diff --git a/vnet/vnet/hdlc/hdlc.h b/vnet/vnet/hdlc/hdlc.h new file mode 100644 index 00000000000..e5cbe62d33d --- /dev/null +++ b/vnet/vnet/hdlc/hdlc.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * hdlc.h: types/functions for hdlc. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_hdlc_h +#define included_hdlc_h + +#include <vnet/vnet.h> +#include <vnet/hdlc/packet.h> +#include <vnet/pg/pg.h> + +vnet_hw_interface_class_t hdlc_hw_interface_class; + +typedef enum { +#define hdlc_error(n,s) HDLC_ERROR_##n, +#include <vnet/hdlc/error.def> +#undef hdlc_error + HDLC_N_ERROR, +} hdlc_error_t; + +typedef struct { + /* Name (a c string). */ + char * name; + + /* HDLC protocol type in host byte order. */ + hdlc_protocol_t protocol; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} hdlc_protocol_info_t; + +typedef struct { + vlib_main_t * vlib_main; + + hdlc_protocol_info_t * protocol_infos; + + /* Hash tables mapping name/protocol to protocol info index. */ + uword * protocol_info_by_name, * protocol_info_by_protocol; +} hdlc_main_t; + +always_inline hdlc_protocol_info_t * +hdlc_get_protocol_info (hdlc_main_t * em, hdlc_protocol_t protocol) +{ + uword * p = hash_get (em->protocol_info_by_protocol, protocol); + return p ? vec_elt_at_index (em->protocol_infos, p[0]) : 0; +} + +extern hdlc_main_t hdlc_main; + +/* Register given node index to take input for given hdlc type. */ +void +hdlc_register_input_type (vlib_main_t * vm, + hdlc_protocol_t protocol, + u32 node_index); + +void hdlc_set_adjacency (vnet_rewrite_header_t * rw, + uword max_data_bytes, + hdlc_protocol_t protocol); + +format_function_t format_hdlc_protocol; +format_function_t format_hdlc_header; +format_function_t format_hdlc_header_with_length; + +/* Parse hdlc protocol as 0xXXXX or protocol name. + In either host or network byte order. */ +unformat_function_t unformat_hdlc_protocol_host_byte_order; +unformat_function_t unformat_hdlc_protocol_net_byte_order; + +/* Parse hdlc header. */ +unformat_function_t unformat_hdlc_header; +unformat_function_t unformat_pg_hdlc_header; + +always_inline void +hdlc_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + pg_node_t * pn = pg_get_node (node_index); + + n->format_buffer = format_hdlc_header_with_length; + n->unformat_buffer = unformat_hdlc_header; + pn->unformat_edit = unformat_pg_hdlc_header; +} + +void +hdlc_register_input_protocol (vlib_main_t * vm, + hdlc_protocol_t protocol, + u32 node_index); + +#endif /* included_hdlc_h */ diff --git a/vnet/vnet/hdlc/node.c b/vnet/vnet/hdlc/node.c new file mode 100644 index 00000000000..4fe0296aca1 --- /dev/null +++ b/vnet/vnet/hdlc/node.c @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * hdlc_node.c: hdlc packet processing + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/hdlc/hdlc.h> +#include <vppinfra/sparse_vec.h> + +#define foreach_hdlc_input_next \ + _ (PUNT, "error-punt") \ + _ (DROP, "error-drop") + +typedef enum { +#define _(s,n) HDLC_INPUT_NEXT_##s, + foreach_hdlc_input_next +#undef _ + HDLC_INPUT_N_NEXT, +} hdlc_input_next_t; + +typedef struct { + u8 packet_data[32]; +} hdlc_input_trace_t; + +static u8 * format_hdlc_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + hdlc_input_trace_t * t = va_arg (*va, hdlc_input_trace_t *); + + s = format (s, "%U", format_hdlc_header, t->packet_data); + + return s; +} + +typedef struct { + /* Sparse vector mapping hdlc protocol in network byte order + to next index. */ + u16 * next_by_protocol; + + u32 * sparse_index_by_next_index; +} hdlc_input_runtime_t; + +static uword +hdlc_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + hdlc_input_runtime_t * rt = (void *) node->runtime_data; + u32 n_left_from, next_index, i_next, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, + from, + n_left_from, + sizeof (from[0]), + sizeof (hdlc_input_trace_t)); + + next_index = node->cached_next_index; + i_next = vec_elt (rt->sparse_index_by_next_index, next_index); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + hdlc_header_t * h0, * h1; + u32 i0, i1, len0, len1, protocol0, protocol1, enqueue_code; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * b2, * b3; + + b2 = vlib_get_buffer (vm, from[2]); + b3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (b2, LOAD); + vlib_prefetch_buffer_header (b3, LOAD); + + CLIB_PREFETCH (b2->data, sizeof (h0[0]), LOAD); + CLIB_PREFETCH (b3->data, sizeof (h1[0]), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = (void *) (b0->data + b0->current_data); + h1 = (void *) (b1->data + b1->current_data); + + protocol0 = h0->protocol; + protocol1 = h1->protocol; + + /* Add padding bytes for OSI protocols. */ + len0 = sizeof (h0[0]); + len1 = sizeof (h1[0]); + + len0 += protocol0 == clib_host_to_net_u16 (HDLC_PROTOCOL_osi); + len1 += protocol1 == clib_host_to_net_u16 (HDLC_PROTOCOL_osi); + + b0->current_data += len0; + b1->current_data += len1; + + b0->current_length -= len0; + b1->current_length -= len1; + + /* Index sparse array with network byte order. */ + sparse_vec_index2 (rt->next_by_protocol, protocol0, protocol1, &i0, &i1); + + b0->error = node->errors[i0 == SPARSE_VEC_INVALID_INDEX ? HDLC_ERROR_UNKNOWN_PROTOCOL : HDLC_ERROR_NONE]; + b1->error = node->errors[i1 == SPARSE_VEC_INVALID_INDEX ? HDLC_ERROR_UNKNOWN_PROTOCOL : HDLC_ERROR_NONE]; + + enqueue_code = (i0 != i_next) + 2*(i1 != i_next); + + if (PREDICT_FALSE (enqueue_code != 0)) + { + switch (enqueue_code) + { + case 1: + /* A B A */ + to_next[-2] = bi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i0), bi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i1), bi1); + break; + + case 3: + /* A B B or A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i0), bi0); + vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i1), bi1); + if (i0 == i1) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + i_next = i1; + next_index = vec_elt (rt->next_by_protocol, i_next); + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + } + } + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + hdlc_header_t * h0; + u32 i0, len0, protocol0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = (void *) (b0->data + b0->current_data); + + protocol0 = h0->protocol; + + /* Add padding bytes for OSI protocols. */ + len0 = sizeof (h0[0]); + len0 += protocol0 == clib_host_to_net_u16 (HDLC_PROTOCOL_osi); + + b0->current_data += len0; + b0->current_length -= len0; + + i0 = sparse_vec_index (rt->next_by_protocol, protocol0); + + b0->error = node->errors[i0 == SPARSE_VEC_INVALID_INDEX ? HDLC_ERROR_UNKNOWN_PROTOCOL : HDLC_ERROR_NONE]; + + /* Sent packet to wrong next? */ + if (PREDICT_FALSE (i0 != i_next)) + { + /* Return old frame; remove incorrectly enqueued packet. */ + vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); + + /* Send to correct next. */ + i_next = i0; + next_index = vec_elt (rt->next_by_protocol, i_next); + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static char * hdlc_error_strings[] = { +#define hdlc_error(n,s) s, +#include "error.def" +#undef hdlc_error +}; + +VLIB_REGISTER_NODE (hdlc_input_node) = { + .function = hdlc_input, + .name = "hdlc-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof (hdlc_input_runtime_t), + + .n_errors = HDLC_N_ERROR, + .error_strings = hdlc_error_strings, + + .n_next_nodes = HDLC_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [HDLC_INPUT_NEXT_##s] = n, + foreach_hdlc_input_next +#undef _ + }, + + .format_buffer = format_hdlc_header_with_length, + .format_trace = format_hdlc_input_trace, + .unformat_buffer = unformat_hdlc_header, +}; + +static clib_error_t * hdlc_input_init (vlib_main_t * vm) +{ + hdlc_input_runtime_t * rt; + + { + clib_error_t * error = vlib_call_init_function (vm, hdlc_init); + if (error) + clib_error_report (error); + } + + hdlc_setup_node (vm, hdlc_input_node.index); + + rt = vlib_node_get_runtime_data (vm, hdlc_input_node.index); + + rt->next_by_protocol = sparse_vec_new + (/* elt bytes */ sizeof (rt->next_by_protocol[0]), + /* bits in index */ BITS (((hdlc_header_t *) 0)->protocol)); + + vec_validate (rt->sparse_index_by_next_index, HDLC_INPUT_NEXT_DROP); + vec_validate (rt->sparse_index_by_next_index, HDLC_INPUT_NEXT_PUNT); + rt->sparse_index_by_next_index[HDLC_INPUT_NEXT_DROP] + = SPARSE_VEC_INVALID_INDEX; + rt->sparse_index_by_next_index[HDLC_INPUT_NEXT_PUNT] + = SPARSE_VEC_INVALID_INDEX; + + return 0; +} + +VLIB_INIT_FUNCTION (hdlc_input_init); + +void +hdlc_register_input_protocol (vlib_main_t * vm, + hdlc_protocol_t protocol, + u32 node_index) +{ + hdlc_main_t * em = &hdlc_main; + hdlc_protocol_info_t * pi; + hdlc_input_runtime_t * rt; + u16 * n; + u32 i; + + { + clib_error_t * error = vlib_call_init_function (vm, hdlc_input_init); + if (error) + clib_error_report (error); + } + + pi = hdlc_get_protocol_info (em, protocol); + pi->node_index = node_index; + pi->next_index = vlib_node_add_next (vm, + hdlc_input_node.index, + node_index); + + /* Setup hdlc protocol -> next index sparse vector mapping. */ + rt = vlib_node_get_runtime_data (vm, hdlc_input_node.index); + n = sparse_vec_validate (rt->next_by_protocol, clib_host_to_net_u16 (protocol)); + n[0] = pi->next_index; + + /* Rebuild next index -> sparse index inverse mapping when sparse vector + is updated. */ + vec_validate (rt->sparse_index_by_next_index, pi->next_index); + for (i = 1; i < vec_len (rt->next_by_protocol); i++) + rt->sparse_index_by_next_index[rt->next_by_protocol[i]] = i; +} diff --git a/vnet/vnet/hdlc/packet.h b/vnet/vnet/hdlc/packet.h new file mode 100644 index 00000000000..45e5496fbc8 --- /dev/null +++ b/vnet/vnet/hdlc/packet.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_hdlc_packet_h +#define included_vnet_hdlc_packet_h + +/* + * HDLC packet format + * + * Copyright (c) 2009 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#define foreach_hdlc_protocol \ + _ (0x0800, ip4) \ + _ (0x2000, cdp) \ + _ (0x8035, slarp) \ + _ (0x8847, mpls_unicast) \ + _ (0x8848, mpls_multicast) \ + _ (0x86dd, ip6) \ + _ (0xfefe, osi) + +typedef enum { +#define _(n,f) HDLC_PROTOCOL_##f = n, + foreach_hdlc_protocol +#undef _ +} hdlc_protocol_t; + +typedef struct { + /* Set to 0x0f for unicast; 0x8f for broadcast. */ + u8 address; + + /* Always zero. */ + u8 control; + + /* Layer 3 protocol for this packet. */ + u16 protocol; + + /* Layer 3 payload. */ + u8 payload[0]; +} hdlc_header_t; + +#endif /* included_vnet_hdlc_packet_h */ diff --git a/vnet/vnet/hdlc/pg.c b/vnet/vnet/hdlc/pg.c new file mode 100644 index 00000000000..b8e67022b08 --- /dev/null +++ b/vnet/vnet/hdlc/pg.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * hdlc_pg.c: packet generator hdlc interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/hdlc/hdlc.h> + +typedef struct { + pg_edit_t address; + pg_edit_t control; + pg_edit_t protocol; +} pg_hdlc_header_t; + +static inline void +pg_hdlc_header_init (pg_hdlc_header_t * e) +{ + pg_edit_init (&e->address, hdlc_header_t, address); + pg_edit_init (&e->control, hdlc_header_t, control); + pg_edit_init (&e->protocol, hdlc_header_t, protocol); +} + +uword +unformat_pg_hdlc_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_hdlc_header_t * h; + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (hdlc_header_t), + &group_index); + pg_hdlc_header_init (h); + + pg_edit_set_fixed (&h->address, 0x0f); + pg_edit_set_fixed (&h->control, 0x00); + + error = 1; + if (! unformat (input, "%U", + unformat_pg_edit, + unformat_hdlc_protocol_net_byte_order, &h->protocol)) + goto done; + + { + hdlc_main_t * pm = &hdlc_main; + hdlc_protocol_info_t * pi = 0; + pg_node_t * pg_node = 0; + + if (h->protocol.type == PG_EDIT_FIXED) + { + u16 t = *(u16 *) h->protocol.values[PG_EDIT_LO]; + pi = hdlc_get_protocol_info (pm, clib_net_to_host_u16 (t)); + if (pi && pi->node_index != ~0) + pg_node = pg_get_node (pi->node_index); + } + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto done; + } + + error = 0; + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/interface.c b/vnet/vnet/interface.c new file mode 100644 index 00000000000..ffe0b672acd --- /dev/null +++ b/vnet/vnet/interface.c @@ -0,0 +1,1051 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * interface.c: VNET interfaces/sub-interfaces + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> + +#define VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE (1 << 0) +#define VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE (1 << 1) + +static clib_error_t * +vnet_hw_interface_set_flags_helper (vnet_main_t * vnm, u32 hw_if_index, u32 flags, + u32 helper_flags); + +static clib_error_t * +vnet_sw_interface_set_flags_helper (vnet_main_t * vnm, u32 sw_if_index, u32 flags, + u32 helper_flags); + +static clib_error_t * +vnet_hw_interface_set_class_helper (vnet_main_t * vnm, u32 hw_if_index, u32 hw_class_index, u32 redistribute); + +typedef struct { + /* Either sw or hw interface index. */ + u32 sw_hw_if_index; + + /* Flags. */ + u32 flags; +} vnet_sw_hw_interface_state_t; + +static void serialize_vec_vnet_sw_hw_interface_state (serialize_main_t * m, va_list * va) +{ + vnet_sw_hw_interface_state_t * s = va_arg (*va, vnet_sw_hw_interface_state_t *); + u32 n = va_arg (*va, u32); + u32 i; + for (i = 0; i < n; i++) { + serialize_integer (m, s[i].sw_hw_if_index, sizeof (s[i].sw_hw_if_index)); + serialize_integer (m, s[i].flags, sizeof (s[i].flags)); + } +} + +static void unserialize_vec_vnet_sw_hw_interface_state (serialize_main_t * m, va_list * va) +{ + vnet_sw_hw_interface_state_t * s = va_arg (*va, vnet_sw_hw_interface_state_t *); + u32 n = va_arg (*va, u32); + u32 i; + for (i = 0; i < n; i++) { + unserialize_integer (m, &s[i].sw_hw_if_index, sizeof (s[i].sw_hw_if_index)); + unserialize_integer (m, &s[i].flags, sizeof (s[i].flags)); + } +} + +static void serialize_vnet_sw_hw_interface_set_flags (serialize_main_t * m, va_list * va) +{ + vnet_sw_hw_interface_state_t * s = va_arg (*va, vnet_sw_hw_interface_state_t *); + serialize (m, serialize_vec_vnet_sw_hw_interface_state, s, 1); +} + +static void unserialize_vnet_sw_interface_set_flags (serialize_main_t * m, va_list * va) +{ + CLIB_UNUSED (mc_main_t * mc) = va_arg (*va, mc_main_t *); + vnet_sw_hw_interface_state_t s; + + unserialize (m, unserialize_vec_vnet_sw_hw_interface_state, &s, 1); + + vnet_sw_interface_set_flags_helper + (vnet_get_main(), s.sw_hw_if_index, s.flags, + /* helper_flags no redistribution */ 0); +} + +static void unserialize_vnet_hw_interface_set_flags (serialize_main_t * m, va_list * va) +{ + CLIB_UNUSED (mc_main_t * mc) = va_arg (*va, mc_main_t *); + vnet_sw_hw_interface_state_t s; + + unserialize (m, unserialize_vec_vnet_sw_hw_interface_state, &s, 1); + + vnet_hw_interface_set_flags_helper + (vnet_get_main(), s.sw_hw_if_index, s.flags, + /* helper_flags no redistribution */ 0); +} + +MC_SERIALIZE_MSG (vnet_sw_interface_set_flags_msg, static) = { + .name = "vnet_sw_interface_set_flags", + .serialize = serialize_vnet_sw_hw_interface_set_flags, + .unserialize = unserialize_vnet_sw_interface_set_flags, +}; + +MC_SERIALIZE_MSG (vnet_hw_interface_set_flags_msg, static) = { + .name = "vnet_hw_interface_set_flags", + .serialize = serialize_vnet_sw_hw_interface_set_flags, + .unserialize = unserialize_vnet_hw_interface_set_flags, +}; + +void serialize_vnet_interface_state (serialize_main_t * m, va_list * va) +{ + vnet_main_t * vnm = va_arg (*va, vnet_main_t *); + vnet_sw_hw_interface_state_t * sts = 0, * st; + vnet_sw_interface_t * sif; + vnet_hw_interface_t * hif; + vnet_interface_main_t * im = &vnm->interface_main; + + /* Serialize hardware interface classes since they may have changed. + Must do this before sending up/down flags. */ + pool_foreach (hif, im->hw_interfaces, ({ + vnet_hw_interface_class_t * hw_class = vnet_get_hw_interface_class (vnm, hif->hw_class_index); + serialize_cstring (m, hw_class->name); + })); + + /* Send sw/hw interface state when non-zero. */ + pool_foreach (sif, im->sw_interfaces, ({ + if (sif->flags != 0) + { + vec_add2 (sts, st, 1); + st->sw_hw_if_index = sif->sw_if_index; + st->flags = sif->flags; + } + })); + + vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state); + + if (sts) + _vec_len (sts) = 0; + + pool_foreach (hif, im->hw_interfaces, ({ + if (hif->flags != 0) + { + vec_add2 (sts, st, 1); + st->sw_hw_if_index = hif->hw_if_index; + st->flags = hif->flags; + } + })); + + vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state); + + vec_free (sts); +} + +void unserialize_vnet_interface_state (serialize_main_t * m, va_list * va) +{ + vnet_main_t * vnm = va_arg (*va, vnet_main_t *); + vnet_sw_hw_interface_state_t * sts = 0, * st; + + /* First set interface hardware class. */ + { + vnet_interface_main_t * im = &vnm->interface_main; + vnet_hw_interface_t * hif; + char * class_name; + uword * p; + clib_error_t * error; + + pool_foreach (hif, im->hw_interfaces, ({ + unserialize_cstring (m, &class_name); + p = hash_get_mem (im->hw_interface_class_by_name, class_name); + ASSERT (p != 0); + error = vnet_hw_interface_set_class_helper (vnm, hif->hw_if_index, p[0], /* redistribute */ 0); + if (error) + clib_error_report (error); + vec_free (class_name); + })); + } + + vec_unserialize (m, &sts, unserialize_vec_vnet_sw_hw_interface_state); + vec_foreach (st, sts) + vnet_sw_interface_set_flags_helper (vnm, st->sw_hw_if_index, st->flags, + /* no distribute */ 0); + vec_free (sts); + + vec_unserialize (m, &sts, unserialize_vec_vnet_sw_hw_interface_state); + vec_foreach (st, sts) + vnet_hw_interface_set_flags_helper (vnm, st->sw_hw_if_index, st->flags, + /* no distribute */ 0); + vec_free (sts); +} + +static clib_error_t * +call_elf_section_interface_callbacks (vnet_main_t * vnm, u32 if_index, + u32 flags, + _vnet_interface_function_list_elt_t *elt) +{ + clib_error_t * error = 0; + + while (elt) + { + error = elt->fp(vnm, if_index, flags); + if (error) + return error; + elt = elt->next_interface_function; + } + return error; +} + +static clib_error_t * +call_hw_interface_add_del_callbacks (vnet_main_t * vnm, u32 hw_if_index, u32 is_create) +{ + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); + vnet_hw_interface_class_t * hw_class = vnet_get_hw_interface_class (vnm, hi->hw_class_index); + vnet_device_class_t * dev_class = vnet_get_device_class (vnm, hi->dev_class_index); + clib_error_t * error = 0; + + if (hw_class->interface_add_del_function + && (error = hw_class->interface_add_del_function (vnm, hw_if_index, is_create))) + return error; + + if (dev_class->interface_add_del_function + && (error = dev_class->interface_add_del_function (vnm, hw_if_index, is_create))) + return error; + + error = call_elf_section_interface_callbacks + (vnm, hw_if_index, is_create, vnm->hw_interface_add_del_functions); + + return error; +} + +static clib_error_t * +call_sw_interface_add_del_callbacks (vnet_main_t * vnm, u32 sw_if_index, u32 is_create) +{ + return call_elf_section_interface_callbacks + (vnm, sw_if_index, is_create, vnm->sw_interface_add_del_functions); +} + +#define VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE (1 << 0) +#define VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE (1 << 1) + +static clib_error_t * +vnet_hw_interface_set_flags_helper (vnet_main_t * vnm, u32 hw_if_index, u32 flags, + u32 helper_flags) +{ + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); + vnet_hw_interface_class_t * hw_class = vnet_get_hw_interface_class (vnm, hi->hw_class_index); + vnet_device_class_t * dev_class = vnet_get_device_class (vnm, hi->dev_class_index); + vlib_main_t * vm = vnm->vlib_main; + u32 mask; + clib_error_t * error = 0; + u32 is_create = (helper_flags & VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE) != 0; + + mask = (VNET_HW_INTERFACE_FLAG_LINK_UP | VNET_HW_INTERFACE_FLAG_DUPLEX_MASK | + VNET_HW_INTERFACE_FLAG_SPEED_MASK); + flags &= mask; + + /* Call hardware interface add/del callbacks. */ + if (is_create) + call_hw_interface_add_del_callbacks (vnm, hw_if_index, is_create); + + /* Already in the desired state? */ + if (! is_create && (hi->flags & mask) == flags) + goto done; + + /* Some interface classes do not redistribute (e.g. are local). */ + if (! dev_class->redistribute) + helper_flags &= ~ VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE; + + if (vm->mc_main + && (helper_flags & VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE)) + { + vnet_sw_hw_interface_state_t s; + s.sw_hw_if_index = hw_if_index; + s.flags = flags; + mc_serialize (vm->mc_main, &vnet_hw_interface_set_flags_msg, &s); + } + + if ((hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) != + (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) + { + /* Do hardware class (e.g. ethernet). */ + if (hw_class->link_up_down_function + && (error = hw_class->link_up_down_function (vnm, hw_if_index, + flags))) + goto done; + + error = call_elf_section_interface_callbacks + (vnm, hw_if_index, is_create, vnm->hw_interface_link_up_down_functions); + + if (error) + goto done; + } + + hi->flags &= ~mask; + hi->flags |= flags; + + done: + return error; +} + +static clib_error_t * +vnet_sw_interface_set_flags_helper (vnet_main_t * vnm, u32 sw_if_index, u32 flags, + u32 helper_flags) +{ + vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, sw_if_index); + vlib_main_t * vm = vnm->vlib_main; + u32 mask; + clib_error_t * error = 0; + u32 is_create = (helper_flags & VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE) != 0; + + mask = VNET_SW_INTERFACE_FLAG_ADMIN_UP | VNET_SW_INTERFACE_FLAG_PUNT; + flags &= mask; + + if (is_create) + { + error = call_sw_interface_add_del_callbacks (vnm, sw_if_index, is_create); + if (error) + goto done; + + if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + { + /* Notify everyone when the interface is created as admin up */ + error = call_elf_section_interface_callbacks (vnm, sw_if_index, + flags, vnm->sw_interface_admin_up_down_functions); + if (error) + goto done; + } + } + else + { + vnet_sw_interface_t * si_sup = si; + + /* Check that super interface is in correct state. */ + if (si->type == VNET_SW_INTERFACE_TYPE_SUB) + { + si_sup = vnet_get_sw_interface (vnm, si->sup_sw_if_index); + + if (flags != (si_sup->flags & mask)) + { + error = clib_error_return (0, "super-interface %U must be %U", + format_vnet_sw_interface_name, vnm, si_sup, + format_vnet_sw_interface_flags, flags); + goto done; + } + } + + /* Already in the desired state? */ + if ((si->flags & mask) == flags) + goto done; + + /* Sub-interfaces of hardware interfaces that do no redistribute, + do not redistribute themselves. */ + if (si_sup->type == VNET_SW_INTERFACE_TYPE_HARDWARE) + { + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, si_sup->hw_if_index); + vnet_device_class_t * dev_class = vnet_get_device_class (vnm, hi->dev_class_index); + if (! dev_class->redistribute) + helper_flags &= ~ VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE; + } + + if (vm->mc_main + && (helper_flags & VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE)) + { + vnet_sw_hw_interface_state_t s; + s.sw_hw_if_index = sw_if_index; + s.flags = flags; + mc_serialize (vm->mc_main, &vnet_sw_interface_set_flags_msg, &s); + } + + error = call_elf_section_interface_callbacks + (vnm, sw_if_index, flags, vnm->sw_interface_admin_up_down_functions); + + if (error) + goto done; + + if (si->type == VNET_SW_INTERFACE_TYPE_HARDWARE) + { + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, si->hw_if_index); + vnet_hw_interface_class_t * hw_class = vnet_get_hw_interface_class (vnm, hi->hw_class_index); + vnet_device_class_t * dev_class = vnet_get_device_class (vnm, hi->dev_class_index); + + if (dev_class->admin_up_down_function + && (error = dev_class->admin_up_down_function (vnm, si->hw_if_index, flags))) + goto done; + + if (hw_class->admin_up_down_function + && (error = hw_class->admin_up_down_function (vnm, si->hw_if_index, flags))) + goto done; + + /* Admin down implies link down. */ + if (! (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + && (hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) + vnet_hw_interface_set_flags_helper (vnm, si->hw_if_index, + hi->flags &~ VNET_HW_INTERFACE_FLAG_LINK_UP, + helper_flags); + } + } + + si->flags &= ~mask; + si->flags |= flags; + + done: + return error; +} + +clib_error_t * +vnet_hw_interface_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + return vnet_hw_interface_set_flags_helper + (vnm, hw_if_index, flags, + VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE); +} + +clib_error_t * +vnet_sw_interface_set_flags (vnet_main_t * vnm, u32 sw_if_index, u32 flags) +{ + return vnet_sw_interface_set_flags_helper + (vnm, sw_if_index, flags, + VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE); +} + +static u32 +vnet_create_sw_interface_no_callbacks (vnet_main_t * vnm, vnet_sw_interface_t * template) +{ + vnet_interface_main_t * im = &vnm->interface_main; + vnet_sw_interface_t * sw; + u32 sw_if_index; + + pool_get (im->sw_interfaces, sw); + sw_if_index = sw - im->sw_interfaces; + + sw[0] = template[0]; + + sw->flags = 0; + sw->sw_if_index = sw_if_index; + if (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE) + sw->sup_sw_if_index = sw->sw_if_index; + + /* Allocate counters for this interface. */ + { + u32 i; + + vnet_interface_counter_lock(im); + + for (i = 0; i < vec_len (im->sw_if_counters); i++) + { + vlib_validate_simple_counter (&im->sw_if_counters[i], sw_if_index); + vlib_zero_simple_counter (&im->sw_if_counters[i], sw_if_index); + } + + for (i = 0; i < vec_len (im->combined_sw_if_counters); i++) + { + vlib_validate_combined_counter (&im->combined_sw_if_counters[i], + sw_if_index); + vlib_zero_combined_counter (&im->combined_sw_if_counters[i], + sw_if_index); + } + + vnet_interface_counter_unlock(im); + } + + return sw_if_index; +} + +clib_error_t * +vnet_create_sw_interface (vnet_main_t * vnm, vnet_sw_interface_t * template, u32 * sw_if_index) +{ + clib_error_t * error; + vnet_hw_interface_t * hi; + vnet_device_class_t * dev_class; + + hi = vnet_get_sup_hw_interface (vnm, template->sup_sw_if_index); + dev_class = vnet_get_device_class (vnm, hi->dev_class_index); + + if (template->type == VNET_SW_INTERFACE_TYPE_SUB && + dev_class->subif_add_del_function) { + error = dev_class->subif_add_del_function (vnm, hi->hw_if_index, + (struct vnet_sw_interface_t *) template, 1); + if (error) + return error; + } + + *sw_if_index = vnet_create_sw_interface_no_callbacks (vnm, template); + error = vnet_sw_interface_set_flags_helper + (vnm, *sw_if_index, template->flags, + VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE); + + if (error) { + // undo the work done by vnet_create_sw_interface_no_callbacks() + vnet_interface_main_t * im = &vnm->interface_main; + vnet_sw_interface_t * sw = pool_elt_at_index (im->sw_interfaces, *sw_if_index); + pool_put (im->sw_interfaces, sw); + } + + return error; +} + +void vnet_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index) +{ + vnet_interface_main_t * im = &vnm->interface_main; + vnet_sw_interface_t * sw = pool_elt_at_index (im->sw_interfaces, sw_if_index); + + /* Bring down interface in case it is up. */ + if (sw->flags != 0) + vnet_sw_interface_set_flags (vnm, sw_if_index, /* flags */ 0); + + call_sw_interface_add_del_callbacks (vnm, sw_if_index, /* is_create */ 0); + + pool_put (im->sw_interfaces, sw); +} + +static void setup_tx_node (vlib_main_t * vm, + u32 node_index, + vnet_device_class_t * dev_class) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + + n->function = dev_class->tx_function; + n->format_trace = dev_class->format_tx_trace; + vlib_register_errors (vm, node_index, + dev_class->tx_function_n_errors, + dev_class->tx_function_error_strings); +} + +static void setup_output_node (vlib_main_t * vm, + u32 node_index, + vnet_hw_interface_class_t * hw_class) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + n->format_buffer = hw_class->format_header; + n->unformat_buffer = hw_class->unformat_header; +} + +/* Register an interface instance. */ +u32 +vnet_register_interface (vnet_main_t * vnm, + u32 dev_class_index, + u32 dev_instance, + u32 hw_class_index, + u32 hw_instance) +{ + vnet_interface_main_t * im = &vnm->interface_main; + vnet_hw_interface_t * hw; + vnet_device_class_t * dev_class = vnet_get_device_class (vnm, dev_class_index); + vnet_hw_interface_class_t * hw_class = vnet_get_hw_interface_class (vnm, hw_class_index); + vlib_main_t * vm = vnm->vlib_main; + u32 hw_index; + char * tx_node_name, * output_node_name; + + pool_get (im->hw_interfaces, hw); + + hw_index = hw - im->hw_interfaces; + hw->hw_if_index = hw_index; + + if (dev_class->format_device_name) + hw->name = format (0, "%U", + dev_class->format_device_name, dev_instance); + else if (hw_class->format_interface_name) + hw->name = format (0, "%U", hw_class->format_interface_name, + dev_instance); + else + hw->name = format (0, "%s%x", hw_class->name, dev_instance); + + if (! im->hw_interface_by_name) + im->hw_interface_by_name = hash_create_vec (/* size */ 0, + sizeof (hw->name[0]), + sizeof (uword)); + + hash_set_mem (im->hw_interface_by_name, hw->name, hw_index); + + /* Make hardware interface point to software interface. */ + { + vnet_sw_interface_t sw; + + memset (&sw, 0, sizeof (sw)); + sw.type = VNET_SW_INTERFACE_TYPE_HARDWARE; + sw.hw_if_index = hw_index; + hw->sw_if_index = vnet_create_sw_interface_no_callbacks (vnm, &sw); + } + + hw->dev_class_index = dev_class_index; + hw->dev_instance = dev_instance; + hw->hw_class_index = hw_class_index; + hw->hw_instance = hw_instance; + + hw->max_rate_bits_per_sec = 0; + hw->min_packet_bytes = 0; + hw->per_packet_overhead_bytes = 0; + hw->max_l3_packet_bytes[VLIB_RX] = ~0; + hw->max_l3_packet_bytes[VLIB_TX] = ~0; + + tx_node_name = (char *) format (0, "%v-tx", hw->name); + output_node_name = (char *) format (0, "%v-output", hw->name); + + /* If we have previously deleted interface nodes, re-use them. */ + if (vec_len (im->deleted_hw_interface_nodes) > 0) + { + vnet_hw_interface_nodes_t * hn; + vnet_interface_output_runtime_t * rt; + + hn = vec_end (im->deleted_hw_interface_nodes) - 1; + + hw->tx_node_index = hn->tx_node_index; + hw->output_node_index = hn->output_node_index; + + vlib_node_rename (vm, hw->tx_node_index, "%v", tx_node_name); + vlib_node_rename (vm, hw->output_node_index, "%v", output_node_name); + + rt = vlib_node_get_runtime_data (vm, hw->output_node_index); + ASSERT (rt->is_deleted == 1); + rt->is_deleted = 0; + + _vec_len (im->deleted_hw_interface_nodes) -= 1; + } + else + { + vlib_node_registration_t r; + vnet_interface_output_runtime_t rt = { + .hw_if_index = hw_index, + .sw_if_index = hw->sw_if_index, + .dev_instance = hw->dev_instance, + .is_deleted = 0, + }; + + memset (&r, 0, sizeof (r)); + r.type = VLIB_NODE_TYPE_INTERNAL; + r.runtime_data = &rt; + r.runtime_data_bytes = sizeof (rt); + r.scalar_size = 0; + r.vector_size = sizeof (u32); + + r.flags = VLIB_NODE_FLAG_IS_OUTPUT; + r.name = tx_node_name; + r.function = dev_class->tx_function; + + hw->tx_node_index = vlib_register_node (vm, &r); + + vlib_node_add_named_next_with_slot (vm, hw->tx_node_index, + "error-drop", + VNET_INTERFACE_TX_NEXT_DROP); + + r.flags = 0; + r.name = output_node_name; + r.function = dev_class->no_flatten_output_chains ? + vnet_interface_output_node_no_flatten : + vnet_interface_output_node; + r.format_trace = format_vnet_interface_output_trace; + + { + static char * e[] = { + "interface is down", + "interface is deleted", + }; + + r.n_errors = ARRAY_LEN (e); + r.error_strings = e; + } + + hw->output_node_index = vlib_register_node (vm, &r); + +#define _(sym,str) vlib_node_add_named_next_with_slot (vm, \ + hw->output_node_index, str, \ + VNET_INTERFACE_OUTPUT_NEXT_##sym); + foreach_intf_output_feat +#undef _ + + vlib_node_add_named_next_with_slot (vm, hw->output_node_index, + "error-drop", + VNET_INTERFACE_OUTPUT_NEXT_DROP); + vlib_node_add_next_with_slot (vm, hw->output_node_index, + hw->tx_node_index, + VNET_INTERFACE_OUTPUT_NEXT_TX); + } + + setup_output_node (vm, hw->output_node_index, hw_class); + setup_tx_node (vm, hw->tx_node_index, dev_class); + + /* Call all up/down callbacks with zero flags when interface is created. */ + vnet_sw_interface_set_flags_helper + (vnm, hw->sw_if_index, /* flags */ 0, + VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE); + vnet_hw_interface_set_flags_helper + (vnm, hw_index, /* flags */ 0, + VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE); + + return hw_index; +} + +void vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index) +{ + vnet_interface_main_t * im = &vnm->interface_main; + vnet_hw_interface_t * hw = vnet_get_hw_interface (vnm, hw_if_index); + vlib_main_t * vm = vnm->vlib_main; + + /* If it is up, mark it down. */ + if (hw->flags != 0) + vnet_hw_interface_set_flags (vnm, hw_if_index, /* flags */ 0); + + /* Call delete callbacks. */ + call_hw_interface_add_del_callbacks (vnm, hw_if_index, /* is_create */ 0); + + /* Delete software interface corresponding to hardware interface. */ + vnet_delete_sw_interface (vnm, hw->sw_if_index); + + /* Delete any sub-interfaces. */ + { + u32 id, sw_if_index; + hash_foreach (id, sw_if_index, hw->sub_interface_sw_if_index_by_id, ({ + vnet_delete_sw_interface (vnm, sw_if_index); + })); + } + + { + vnet_hw_interface_nodes_t * dn; + vnet_interface_output_runtime_t * rt = vlib_node_get_runtime_data (vm, hw->output_node_index); + + /* Mark node runtime as deleted so output node (if called) will drop packets. */ + rt->is_deleted = 1; + + vlib_node_rename (vm, hw->output_node_index, "interface-%d-output-deleted", hw_if_index); + vlib_node_rename (vm, hw->tx_node_index, "interface-%d-tx-deleted", hw_if_index); + vec_add2 (im->deleted_hw_interface_nodes, dn, 1); + dn->tx_node_index = hw->tx_node_index; + dn->output_node_index = hw->output_node_index; + } + + hash_unset_mem (im->hw_interface_by_name, hw->name); + vec_free (hw->name); + + pool_put (im->hw_interfaces, hw); +} + +static void serialize_vnet_hw_interface_set_class (serialize_main_t * m, va_list * va) +{ + u32 hw_if_index = va_arg (*va, u32); + char * hw_class_name = va_arg (*va, char *); + serialize_integer (m, hw_if_index, sizeof (hw_if_index)); + serialize_cstring (m, hw_class_name); +} + +static void unserialize_vnet_hw_interface_set_class (serialize_main_t * m, va_list * va) +{ + CLIB_UNUSED (mc_main_t * mc) = va_arg (*va, mc_main_t *); + vnet_main_t * vnm = vnet_get_main(); + u32 hw_if_index; + char * hw_class_name; + uword * p; + clib_error_t * error; + + unserialize_integer (m, &hw_if_index, sizeof (hw_if_index)); + unserialize_cstring (m, &hw_class_name); + p = hash_get (vnm->interface_main.hw_interface_class_by_name, hw_class_name); + ASSERT (p != 0); + error = vnet_hw_interface_set_class_helper (vnm, hw_if_index, p[0], /* redistribute */ 0); + if (error) + clib_error_report (error); +} + +MC_SERIALIZE_MSG (vnet_hw_interface_set_class_msg, static) = { + .name = "vnet_hw_interface_set_class", + .serialize = serialize_vnet_hw_interface_set_class, + .unserialize = unserialize_vnet_hw_interface_set_class, +}; + +void vnet_hw_interface_init_for_class (vnet_main_t * vnm, u32 hw_if_index, u32 hw_class_index, u32 hw_instance) +{ + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); + vnet_hw_interface_class_t * hc = vnet_get_hw_interface_class (vnm, hw_class_index); + + hi->hw_class_index = hw_class_index; + hi->hw_instance = hw_instance; + setup_output_node (vnm->vlib_main, hi->output_node_index, hc); +} + +static clib_error_t * +vnet_hw_interface_set_class_helper (vnet_main_t * vnm, u32 hw_if_index, u32 hw_class_index, u32 redistribute) +{ + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); + vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, hi->sw_if_index); + vnet_hw_interface_class_t * old_class = vnet_get_hw_interface_class (vnm, hi->hw_class_index); + vnet_hw_interface_class_t * new_class = vnet_get_hw_interface_class (vnm, hw_class_index); + vnet_device_class_t * dev_class = vnet_get_device_class (vnm, hi->dev_class_index); + clib_error_t * error = 0; + + /* New class equals old class? Nothing to do. */ + if (hi->hw_class_index == hw_class_index) + return 0; + + /* No need (and incorrect since admin up flag may be set) to do error checking when + receiving unserialize message. */ + if (redistribute) + { + if (si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + return clib_error_return (0, "%v must be admin down to change class from %s to %s", + hi->name, old_class->name, new_class->name); + + /* Make sure interface supports given class. */ + if ((new_class->is_valid_class_for_interface + && ! new_class->is_valid_class_for_interface (vnm, hw_if_index, hw_class_index)) + || (dev_class ->is_valid_class_for_interface + && ! dev_class->is_valid_class_for_interface (vnm, hw_if_index, hw_class_index))) + return clib_error_return (0, "%v class cannot be changed from %s to %s", + hi->name, old_class->name, new_class->name); + + if (vnm->vlib_main->mc_main) + { + mc_serialize (vnm->vlib_main->mc_main, &vnet_hw_interface_set_class_msg, hw_if_index, new_class->name); + return 0; + } + } + + if (old_class->hw_class_change) + old_class->hw_class_change (vnm, hw_if_index, old_class->index, new_class->index); + + vnet_hw_interface_init_for_class (vnm, hw_if_index, new_class->index, /* instance */ ~0); + + if (new_class->hw_class_change) + new_class->hw_class_change (vnm, hw_if_index, old_class->index, new_class->index); + + if (dev_class->hw_class_change) + dev_class->hw_class_change (vnm, hw_if_index, new_class->index); + + return error; +} + +clib_error_t * +vnet_hw_interface_set_class (vnet_main_t * vnm, u32 hw_if_index, u32 hw_class_index) +{ return vnet_hw_interface_set_class_helper (vnm, hw_if_index, hw_class_index, /* redistribute */ 1); } + +static int +vnet_hw_interface_rx_redirect_to_node_helper (vnet_main_t * vnm, + u32 hw_if_index, + u32 node_index, + u32 redistribute) +{ + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); + vnet_device_class_t * dev_class = vnet_get_device_class + (vnm, hi->dev_class_index); + + if (redistribute) + { + /* $$$$ fixme someday maybe */ + ASSERT(vnm->vlib_main->mc_main == 0); + } + if (dev_class->rx_redirect_to_node) + { + dev_class->rx_redirect_to_node (vnm, hw_if_index, node_index); + return 0; + } + + return VNET_API_ERROR_UNIMPLEMENTED; +} + +int vnet_hw_interface_rx_redirect_to_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ return vnet_hw_interface_rx_redirect_to_node_helper (vnm, hw_if_index, + node_index, + 1 /* redistribute */); } + +word +vnet_sw_interface_compare (vnet_main_t * vnm, + uword sw_if_index0, uword sw_if_index1) +{ + vnet_sw_interface_t * sup0 = vnet_get_sup_sw_interface (vnm, sw_if_index0); + vnet_sw_interface_t * sup1 = vnet_get_sup_sw_interface (vnm, sw_if_index1); + vnet_hw_interface_t * h0 = vnet_get_hw_interface (vnm, sup0->hw_if_index); + vnet_hw_interface_t * h1 = vnet_get_hw_interface (vnm, sup1->hw_if_index); + + if (h0 != h1) + return vec_cmp (h0->name, h1->name); + return (word) h0->hw_instance - (word) h1->hw_instance; +} + +word +vnet_hw_interface_compare (vnet_main_t * vnm, + uword hw_if_index0, uword hw_if_index1) +{ + vnet_hw_interface_t * h0 = vnet_get_hw_interface (vnm, hw_if_index0); + vnet_hw_interface_t * h1 = vnet_get_hw_interface (vnm, hw_if_index1); + + if (h0 != h1) + return vec_cmp (h0->name, h1->name); + return (word) h0->hw_instance - (word) h1->hw_instance; +} + +clib_error_t * +vnet_interface_init (vlib_main_t * vm) +{ + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + vlib_buffer_t * b = 0; + vnet_buffer_opaque_t * o = 0; + + /* + * Keep people from shooting themselves in the foot. + */ + if (sizeof(b->opaque) != sizeof (vnet_buffer_opaque_t)) + { +#define _(a) if (sizeof(o->a) > sizeof (o->unused)) \ + clib_warning \ + ("FATAL: size of opaque union subtype %s is %d (max %d)", \ + #a, sizeof(o->a), sizeof (o->unused)); + foreach_buffer_opaque_union_subtype; +#undef _ + + return clib_error_return + (0, "FATAL: size of vlib buffer opaque %d, size of vnet opaque %d", + sizeof(b->opaque), sizeof (vnet_buffer_opaque_t)); + } + + im->sw_if_counter_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + im->sw_if_counter_lock[0] = 1; /* should be no need */ + + vec_validate (im->sw_if_counters, + VNET_N_SIMPLE_INTERFACE_COUNTER - 1); + im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP].name = "drops"; + im->sw_if_counters[VNET_INTERFACE_COUNTER_PUNT].name = "punts"; + im->sw_if_counters[VNET_INTERFACE_COUNTER_IP4].name = "ip4"; + im->sw_if_counters[VNET_INTERFACE_COUNTER_IP6].name = "ip6"; + im->sw_if_counters[VNET_INTERFACE_COUNTER_RX_NO_BUF].name = "rx-no-buf"; + im->sw_if_counters[VNET_INTERFACE_COUNTER_RX_MISS].name = "rx-miss"; + im->sw_if_counters[VNET_INTERFACE_COUNTER_RX_ERROR].name = "rx-error"; + im->sw_if_counters[VNET_INTERFACE_COUNTER_TX_ERROR].name = "tx-error"; + + vec_validate (im->combined_sw_if_counters, + VNET_N_COMBINED_INTERFACE_COUNTER - 1); + im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX].name = "rx"; + im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX].name = "tx"; + + im->sw_if_counter_lock[0] = 0; + + im->device_class_by_name = hash_create_string (/* size */ 0, + sizeof (uword)); + { + vnet_device_class_t * c; + + c = vnm->device_class_registrations; + + while (c) + { + c->index = vec_len (im->device_classes); + hash_set_mem (im->device_class_by_name, c->name, c->index); + vec_add1 (im->device_classes, c[0]); + c = c->next_class_registration; + } + } + + im->hw_interface_class_by_name = hash_create_string (/* size */ 0, + sizeof (uword)); + + im->sw_if_index_by_sup_and_sub = hash_create_mem (0, sizeof(u64), + sizeof (uword)); + { + vnet_hw_interface_class_t * c; + + c = vnm->hw_interface_class_registrations; + + while (c) + { + c->index = vec_len (im->hw_interface_classes); + hash_set_mem (im->hw_interface_class_by_name, c->name, c->index); + vec_add1 (im->hw_interface_classes, c[0]); + c = c->next_class_registration; + } + } + + { + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, vnet_interface_cli_init))) + return error; + + return error; + } +} + +VLIB_INIT_FUNCTION (vnet_interface_init); + +/* Kludge to renumber interface names [only!] */ +int vnet_interface_name_renumber (u32 sw_if_index, u32 new_show_dev_instance) +{ + int rv; + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + vnet_hw_interface_t * hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + + vnet_device_class_t * dev_class = vnet_get_device_class + (vnm, hi->dev_class_index); + + if (dev_class->name_renumber == 0 || dev_class->format_device_name == 0) + return VNET_API_ERROR_UNIMPLEMENTED; + + rv = dev_class->name_renumber (hi, new_show_dev_instance); + + if (rv) + return rv; + + hash_unset_mem (im->hw_interface_by_name, hi->name); + vec_free (hi->name); + /* Use the mapping we set up to call it Ishmael */ + hi->name = format (0, "%U", dev_class->format_device_name, + hi->dev_instance); + + hash_set_mem (im->hw_interface_by_name, hi->name, hi->hw_if_index); + return rv; +} + +int vnet_interface_add_del_feature(vnet_main_t * vnm, + vlib_main_t *vm, + u32 sw_if_index, + intf_output_feat_t feature, + int is_add) +{ + vnet_sw_interface_t * sw; + + sw = vnet_get_sw_interface(vnm, sw_if_index); + + if (is_add) { + + sw->output_feature_bitmap |= (1 << feature); + sw->output_feature_bitmap |= (1<< INTF_OUTPUT_FEAT_DONE); + + } else { /* delete */ + + sw->output_feature_bitmap &= ~(1<<feature); + if (sw->output_feature_bitmap == (1 << INTF_OUTPUT_FEAT_DONE)) + sw->output_feature_bitmap = 0; + + } + return 0; +} + diff --git a/vnet/vnet/interface.h b/vnet/vnet/interface.h new file mode 100644 index 00000000000..f441f9e1027 --- /dev/null +++ b/vnet/vnet/interface.h @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * interface.h: VNET interfaces/sub-interfaces + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_interface_h +#define included_vnet_interface_h + +#include <vnet/unix/pcap.h> + +struct vnet_main_t; +struct vnet_hw_interface_t; +struct vnet_sw_interface_t; + +/* Interface up/down callback. */ +typedef clib_error_t * (vnet_interface_function_t) + (struct vnet_main_t * vnm, u32 if_index, u32 flags); + +/* Sub-interface add/del callback. */ +typedef clib_error_t * (vnet_subif_add_del_function_t) + (struct vnet_main_t * vnm, u32 if_index, + struct vnet_sw_interface_t * template, + int is_add); + +typedef struct _vnet_interface_function_list_elt { + struct _vnet_interface_function_list_elt * next_interface_function; + clib_error_t * (*fp) (struct vnet_main_t * vnm, u32 if_index, u32 flags); +} _vnet_interface_function_list_elt_t; + +#define _VNET_INTERFACE_FUNCTION_DECL(f,tag) \ + \ +static void __vnet_interface_function_init_##tag##_##f (void) \ + __attribute__((__constructor__)) ; \ + \ +static void __vnet_interface_function_init_##tag##_##f (void) \ +{ \ + vnet_main_t * vnm = vnet_get_main(); \ + static _vnet_interface_function_list_elt_t init_function; \ + init_function.next_interface_function = vnm->tag##_functions; \ + vnm->tag##_functions = &init_function; \ + init_function.fp = (void *) &f; \ +} + +#define VNET_HW_INTERFACE_ADD_DEL_FUNCTION(f) \ + _VNET_INTERFACE_FUNCTION_DECL(f,hw_interface_add_del) +#define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(f) \ + _VNET_INTERFACE_FUNCTION_DECL(f,hw_interface_link_up_down) +#define VNET_SW_INTERFACE_ADD_DEL_FUNCTION(f) \ + _VNET_INTERFACE_FUNCTION_DECL(f,sw_interface_add_del) +#define VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(f) \ + _VNET_INTERFACE_FUNCTION_DECL(f,sw_interface_admin_up_down) + +/* A class of hardware interface devices. */ +typedef struct _vnet_device_class { + /* Index into main vector. */ + u32 index; + + /* Device name (e.g. "FOOBAR 1234a"). */ + char * name; + + /* Function to call when hardware interface is added/deleted. */ + vnet_interface_function_t * interface_add_del_function; + + /* Function to bring device administratively up/down. */ + vnet_interface_function_t * admin_up_down_function; + + /* Function to call when sub-interface is added/deleted */ + vnet_subif_add_del_function_t * subif_add_del_function; + + /* Redistribute flag changes/existence of this interface class. */ + u32 redistribute; + + /* Transmit function. */ + vlib_node_function_t * tx_function; + + /* Error strings indexed by error code for this node. */ + char ** tx_function_error_strings; + + /* Number of error codes used by this node. */ + u32 tx_function_n_errors; + + /* Renumber device name [only!] support, a control-plane kludge */ + int (*name_renumber) (struct vnet_hw_interface_t * hi, u32 new_dev_instance); + + /* Format device instance as name. */ + format_function_t * format_device_name; + + /* Parse function for device name. */ + unformat_function_t * unformat_device_name; + + /* Format device verbosely for this class. */ + format_function_t * format_device; + + /* Trace buffer format for TX function. */ + format_function_t * format_tx_trace; + + /* Function to clear hardware counters for device. */ + void (* clear_counters) (u32 dev_class_instance); + + uword (* is_valid_class_for_interface) (struct vnet_main_t * vnm, u32 hw_if_index, u32 hw_class_index); + + /* Called when hardware class of an interface changes. */ + void ( * hw_class_change) (struct vnet_main_t * vnm, + u32 hw_if_index, + u32 new_hw_class_index); + + /* Called to redirect traffic from a specific interface instance */ + void (* rx_redirect_to_node) (struct vnet_main_t * vnm, + u32 hw_if_index, + u32 node_index); + + /* Link-list of all device classes set up by constructors created below */ + struct _vnet_device_class * next_class_registration; + + /* Do not splice vnet_interface_output_node into TX path */ + u8 no_flatten_output_chains; + +} vnet_device_class_t; + +#define VNET_DEVICE_CLASS(x,...) \ + __VA_ARGS__ vnet_device_class_t x; \ +static void __vnet_add_device_class_registration_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vnet_add_device_class_registration_##x (void) \ +{ \ + vnet_main_t * vnm = vnet_get_main(); \ + x.next_class_registration = vnm->device_class_registrations; \ + vnm->device_class_registrations = &x; \ +} \ +__VA_ARGS__ vnet_device_class_t x + +/* Layer-2 (e.g. Ethernet) interface class. */ +typedef struct _vnet_hw_interface_class { + /* Index into main vector. */ + u32 index; + + /* Class name (e.g. "Ethernet"). */ + char * name; + + /* Function to call when hardware interface is added/deleted. */ + vnet_interface_function_t * interface_add_del_function; + + /* Function to bring interface administratively up/down. */ + vnet_interface_function_t * admin_up_down_function; + + /* Function to call when link state changes. */ + vnet_interface_function_t * link_up_down_function; + + /* Format function to display interface name. */ + format_function_t * format_interface_name; + + /* Format function to display interface address. */ + format_function_t * format_address; + + /* Format packet header for this interface class. */ + format_function_t * format_header; + + /* Format device verbosely for this class. */ + format_function_t * format_device; + + /* Parser for hardware (e.g. ethernet) address. */ + unformat_function_t * unformat_hw_address; + + /* Parser for packet header for e.g. rewrite string. */ + unformat_function_t * unformat_header; + + /* Forms adjacency for given l3 packet type and destination address. + Returns number of bytes in adjacency. */ + uword (* set_rewrite) (struct vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_packet_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes); + + uword (* is_valid_class_for_interface) (struct vnet_main_t * vnm, u32 hw_if_index, u32 hw_class_index); + + /* Called when hw interface class is changed and old hardware instance + may want to be deleted. */ + void (* hw_class_change) (struct vnet_main_t * vnm, u32 hw_if_index, u32 old_class_index, u32 new_class_index); + + /* List of hw interface classes, built by constructors */ + struct _vnet_hw_interface_class * next_class_registration; + +} vnet_hw_interface_class_t; + +#define VNET_HW_INTERFACE_CLASS(x,...) \ + __VA_ARGS__ vnet_hw_interface_class_t x; \ +static void __vnet_add_hw_interface_class_registration_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vnet_add_hw_interface_class_registration_##x (void) \ +{ \ + vnet_main_t * vnm = vnet_get_main(); \ + x.next_class_registration = vnm->hw_interface_class_registrations; \ + vnm->hw_interface_class_registrations = &x; \ +} \ +__VA_ARGS__ vnet_hw_interface_class_t x + +/* Hardware-interface. This corresponds to a physical wire + that packets flow over. */ +typedef struct vnet_hw_interface_t { + /* Interface name. */ + u8 * name; + + u32 flags; + /* Hardware link state is up. */ +#define VNET_HW_INTERFACE_FLAG_LINK_UP (1 << 0) + /* Hardware duplex state */ +#define VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT 1 +#define VNET_HW_INTERFACE_FLAG_HALF_DUPLEX (1 << 1) +#define VNET_HW_INTERFACE_FLAG_FULL_DUPLEX (1 << 2) +#define VNET_HW_INTERFACE_FLAG_DUPLEX_MASK \ + (VNET_HW_INTERFACE_FLAG_HALF_DUPLEX | \ + VNET_HW_INTERFACE_FLAG_FULL_DUPLEX) + + /* Hardware link speed */ +#define VNET_HW_INTERFACE_FLAG_SPEED_SHIFT 3 +#define VNET_HW_INTERFACE_FLAG_SPEED_10M (1 << 3) +#define VNET_HW_INTERFACE_FLAG_SPEED_100M (1 << 4) +#define VNET_HW_INTERFACE_FLAG_SPEED_1G (1 << 5) +#define VNET_HW_INTERFACE_FLAG_SPEED_10G (1 << 6) +#define VNET_HW_INTERFACE_FLAG_SPEED_40G (1 << 7) +#define VNET_HW_INTERFACE_FLAG_SPEED_100G (1 << 8) +#define VNET_HW_INTERFACE_FLAG_SPEED_MASK \ + (VNET_HW_INTERFACE_FLAG_SPEED_10M | \ + VNET_HW_INTERFACE_FLAG_SPEED_100M | \ + VNET_HW_INTERFACE_FLAG_SPEED_1G | \ + VNET_HW_INTERFACE_FLAG_SPEED_10G | \ + VNET_HW_INTERFACE_FLAG_SPEED_40G | \ + VNET_HW_INTERFACE_FLAG_SPEED_100G) + + /* Hardware address as vector. Zero (e.g. zero-length vector) if no + address for this class (e.g. PPP). */ + u8 * hw_address; + + /* Interface is up as far as software is concerned. */ + /* NAME.{output,tx} nodes for this interface. */ + u32 output_node_index, tx_node_index; + + /* (dev_class, dev_instance) uniquely identifies hw interface. */ + u32 dev_class_index; + u32 dev_instance; + + /* (hw_class, hw_instance) uniquely identifies hw interface. */ + u32 hw_class_index; + u32 hw_instance; + + /* Hardware index for this hardware interface. */ + u32 hw_if_index; + + /* Software index for this hardware interface. */ + u32 sw_if_index; + + /* Maximum transmit rate for this interface in bits/sec. */ + f64 max_rate_bits_per_sec; + + /* Smallest packet size for this interface. */ + u32 min_packet_bytes; + + /* Largest packet size for this interface. */ + u32 max_packet_bytes; + + /* Number of extra bytes that go on the wire. + Packet length on wire + = max (length + per_packet_overhead_bytes, min_packet_bytes). */ + u32 per_packet_overhead_bytes; + + /* Receive and transmit layer 3 packet size limits (MRU/MTU). */ + u32 max_l3_packet_bytes[VLIB_N_RX_TX]; + + /* Hash table mapping sub interface id to sw_if_index. */ + uword * sub_interface_sw_if_index_by_id; + + /* Count of number of L2 subinterfaces */ + u32 l2_if_count; +} vnet_hw_interface_t; + +typedef enum { + /* A hw interface. */ + VNET_SW_INTERFACE_TYPE_HARDWARE, + + /* A sub-interface. */ + VNET_SW_INTERFACE_TYPE_SUB, +} vnet_sw_interface_type_t; + +typedef struct { + // Subinterface ID. A number 0-N to uniquely identify this subinterface under the + // main (parent?) interface + u32 id; + + // Classification data. Used to associate packet header with subinterface. + struct { + u16 outer_vlan_id; + u16 inner_vlan_id; + union { + u16 raw_flags; + struct { + u16 no_tags:1; + u16 one_tag:1; + u16 two_tags:1; + u16 dot1ad:1; // 0 = dot1q, 1=dot1ad + u16 exact_match:1; + u16 default_sub:1; + u16 outer_vlan_id_any:1; + u16 inner_vlan_id_any:1; + } flags; + }; + } eth; +} vnet_sub_interface_t; + +/* Software-interface. This corresponds to a Ethernet VLAN, ATM vc, a + tunnel, etc. Configuration (e.g. IP address) gets attached to + software interface. */ +typedef struct { + vnet_sw_interface_type_t type : 16; + + u16 flags; + /* Interface is "up" meaning adminstratively up. + Up in the sense of link state being up is maintained by hardware interface. */ +#define VNET_SW_INTERFACE_FLAG_ADMIN_UP (1 << 0) + + /* Interface is disabled for forwarding: punt all traffic to slow-path. */ +#define VNET_SW_INTERFACE_FLAG_PUNT (1 << 1) + +#define VNET_SW_INTERFACE_FLAG_PROXY_ARP (1 << 2) + +#define VNET_SW_INTERFACE_FLAG_UNNUMBERED (1 << 3) + + /* Index for this interface. */ + u32 sw_if_index; + + /* Software interface index of super-interface; + equal to sw_if_index if this interface is not a + sub-interface. */ + u32 sup_sw_if_index; + + /* this swif is unnumbered, use addresses on unnumbered_sw_if_index... */ + u32 unnumbered_sw_if_index; + + u32 link_speed; + + u32 output_feature_bitmap; + + union { + /* VNET_SW_INTERFACE_TYPE_HARDWARE. */ + u32 hw_if_index; + + /* VNET_SW_INTERFACE_TYPE_SUB. */ + vnet_sub_interface_t sub; + + /* SW interfaces are sorted by type and key. */ + // u32 sort_key; + }; +} vnet_sw_interface_t; + +typedef enum { + /* Simple counters. */ + VNET_INTERFACE_COUNTER_DROP = 0, + VNET_INTERFACE_COUNTER_PUNT = 1, + VNET_INTERFACE_COUNTER_IP4 = 2, + VNET_INTERFACE_COUNTER_IP6 = 3, + VNET_INTERFACE_COUNTER_RX_NO_BUF = 4, + VNET_INTERFACE_COUNTER_RX_MISS = 5, + VNET_INTERFACE_COUNTER_RX_ERROR = 6, + VNET_INTERFACE_COUNTER_TX_ERROR = 7, + VNET_N_SIMPLE_INTERFACE_COUNTER = 8, + /* Combined counters. */ + VNET_INTERFACE_COUNTER_RX = 0, + VNET_INTERFACE_COUNTER_TX = 1, + VNET_N_COMBINED_INTERFACE_COUNTER = 2, +} vnet_interface_counter_type_t; + +typedef struct { + u32 output_node_index; + u32 tx_node_index; +} vnet_hw_interface_nodes_t; + +typedef struct { + /* Hardware interfaces. */ + vnet_hw_interface_t * hw_interfaces; + + /* Hash table mapping HW interface name to index. */ + uword * hw_interface_by_name; + + /* Vectors if hardware interface classes and device classes. */ + vnet_hw_interface_class_t * hw_interface_classes; + vnet_device_class_t * device_classes; + + /* Hash table mapping name to hw interface/device class. */ + uword * hw_interface_class_by_name; + uword * device_class_by_name; + + /* Software interfaces. */ + vnet_sw_interface_t * sw_interfaces; + + /* Hash table mapping sub intfc sw_if_index by sup sw_if_index and sub id */ + uword * sw_if_index_by_sup_and_sub; + + /* Software interface counters both simple and combined + packet and byte counters. */ + volatile u32 *sw_if_counter_lock; + vlib_simple_counter_main_t * sw_if_counters; + vlib_combined_counter_main_t * combined_sw_if_counters; + + vnet_hw_interface_nodes_t * deleted_hw_interface_nodes; + + /* pcap drop tracing */ + int drop_pcap_enable; + pcap_main_t pcap_main; + u8 * pcap_filename; + u32 pcap_sw_if_index; + u32 pcap_pkts_to_capture; + uword * pcap_drop_filter_hash; + +} vnet_interface_main_t; + +static inline void vnet_interface_counter_lock (vnet_interface_main_t *im) +{ + if (im->sw_if_counter_lock) + while (__sync_lock_test_and_set (im->sw_if_counter_lock, 1)) + /* zzzz */ ; +} +static inline void vnet_interface_counter_unlock (vnet_interface_main_t *im) +{ + if (im->sw_if_counter_lock) + *im->sw_if_counter_lock = 0; +} + +void vnet_pcap_drop_trace_filter_add_del (u32 error_index, int is_add); + +int vnet_interface_name_renumber (u32 sw_if_index, u32 new_show_dev_instance); + + +/* + * Output features + */ + +#define foreach_intf_output_feat \ + _(IPSEC, "ipsec-output") + +// Feature bitmap positions +typedef enum { +#define _(sym,str) INTF_OUTPUT_FEAT_##sym, + foreach_intf_output_feat +#undef _ + INTF_OUTPUT_N_FEAT, +} intf_output_feat_t; + +/* flag that we are done with feature path */ +#define INTF_OUTPUT_FEAT_DONE INTF_OUTPUT_N_FEAT + +int vnet_interface_add_del_feature(struct vnet_main_t * vnm, vlib_main_t * vm, + u32 sw_if_index, + intf_output_feat_t feature, int is_add); + +#endif /* included_vnet_interface_h */ diff --git a/vnet/vnet/interface_cli.c b/vnet/vnet/interface_cli.c new file mode 100644 index 00000000000..8d986527fc6 --- /dev/null +++ b/vnet/vnet/interface_cli.c @@ -0,0 +1,769 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * interface_cli.c: interface CLI + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> + +static int compare_interface_names (void *a1, void *a2) +{ + u32 * hi1 = a1; + u32 * hi2 = a2; + + return vnet_hw_interface_compare (vnet_get_main(), *hi1, *hi2); +} + +static clib_error_t * +show_or_clear_hw_interfaces (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t * error = 0; + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + vnet_hw_interface_t * hi; + u32 hw_if_index, * hw_if_indices = 0; + int i, verbose = 1, is_show; + + is_show = strstr (cmd->path, "show") != 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* See if user wants to show a specific interface. */ + if (unformat (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + { + vec_add1 (hw_if_indices, hw_if_index); + /* Implies verbose. */ + verbose = 1; + } + + else if (unformat (input, "verbose")) + verbose = 1; + + else if (unformat (input, "detail")) + verbose = 2; + + else if (unformat (input, "brief")) + verbose = 0; + + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + /* Gather interfaces. */ + if (vec_len (hw_if_indices) == 0) + pool_foreach (hi, im->hw_interfaces, + vec_add1 (hw_if_indices, hi - im->hw_interfaces)); + + if (is_show) + { + /* Sort by name. */ + vec_sort_with_function (hw_if_indices, compare_interface_names); + + vlib_cli_output (vm, "%U\n", format_vnet_hw_interface, vnm, 0, verbose); + for (i = 0; i < vec_len (hw_if_indices); i++) + { + hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); + vlib_cli_output (vm, "%U\n", format_vnet_hw_interface, vnm, hi, verbose); + } + } + else + { + for (i = 0; i < vec_len (hw_if_indices); i++) + { + vnet_device_class_t * dc; + + hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); + dc = vec_elt_at_index (im->device_classes, hi->dev_class_index); + + if (dc->clear_counters) + dc->clear_counters (hi->dev_instance); + } + } + + done: + vec_free (hw_if_indices); + return error; +} + +VLIB_CLI_COMMAND (show_hw_interfaces_command, static) = { + .path = "show hardware-interfaces", + .short_help = "show hardware-interfaces [verbose|brief] [<if-name1> <if-name2> ...]", + .function = show_or_clear_hw_interfaces, +}; + +VLIB_CLI_COMMAND (clear_hw_interface_counters_command, static) = { + .path = "clear hardware-interfaces", + .short_help = "Clear hardware interfaces statistics", + .function = show_or_clear_hw_interfaces, +}; + +static int sw_interface_name_compare (void *a1, void *a2) +{ + vnet_sw_interface_t *si1 = a1; + vnet_sw_interface_t *si2 = a2; + + return vnet_sw_interface_compare (vnet_get_main(), + si1->sw_if_index, si2->sw_if_index); +} + +static clib_error_t * +show_sw_interfaces (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t * error = 0; + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + vnet_sw_interface_t * si, * sorted_sis = 0; + u8 show_addresses = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + u32 sw_if_index; + + /* See if user wants to show specific interface */ + if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + si = pool_elt_at_index (im->sw_interfaces, sw_if_index); + vec_add1 (sorted_sis, si[0]); + } + + else if (unformat (input, "address") || unformat (input, "addr")) + show_addresses = 1; + + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (!show_addresses) + vlib_cli_output (vm, "%U\n", format_vnet_sw_interface, vnm, 0); + + if (vec_len (sorted_sis) == 0) /* Get all interfaces */ + { + /* Gather interfaces. */ + sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces)); + _vec_len (sorted_sis) = 0; + pool_foreach (si, im->sw_interfaces, ({ vec_add1 (sorted_sis, si[0]); })); + + /* Sort by name. */ + vec_sort_with_function (sorted_sis, sw_interface_name_compare); + } + + if (show_addresses) + { + vec_foreach (si, sorted_sis) + { + l2input_main_t * l2m = &l2input_main; + ip4_main_t * im4 = &ip4_main; + ip6_main_t * im6 = &ip6_main; + ip_lookup_main_t * lm4 = &im4->lookup_main; + ip_lookup_main_t * lm6 = &im6->lookup_main; + ip_interface_address_t * ia = 0; + ip4_address_t * r4; + ip6_address_t * r6; + u32 fib_index4 = 0, fib_index6 = 0; + ip4_fib_t * fib4; + ip6_fib_t * fib6; + l2_input_config_t * config; + + if (vec_len (im4->fib_index_by_sw_if_index) > si->sw_if_index) + fib_index4 = vec_elt (im4->fib_index_by_sw_if_index, + si->sw_if_index); + + if (vec_len (im6->fib_index_by_sw_if_index) > si->sw_if_index) + fib_index6 = vec_elt (im6->fib_index_by_sw_if_index, + si->sw_if_index); + + fib4 = vec_elt_at_index (im4->fibs, fib_index4); + fib6 = vec_elt_at_index (im6->fibs, fib_index6); + + if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + vlib_cli_output + (vm, "%U (%s): \n unnumbered, use %U", + format_vnet_sw_if_index_name, + vnm, si->sw_if_index, + (si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? "up" : "dn", + format_vnet_sw_if_index_name, + vnm, si->unnumbered_sw_if_index); + + else + { + vlib_cli_output (vm, "%U (%s):", + format_vnet_sw_if_index_name, + vnm, si->sw_if_index, + (si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + ? "up" : "dn"); + } + + /* Display any L2 addressing info */ + vec_validate(l2m->configs, si->sw_if_index); + config = vec_elt_at_index(l2m->configs, si->sw_if_index); + if (config->bridge) + { + u32 bd_id = l2input_main.bd_configs[config->bd_index].bd_id; + vlib_cli_output (vm, " l2 bridge bd_id %d%s%d", bd_id, + config->bvi ? " bvi shg " : " shg ", config->shg); + } + else if (config->xconnect) + { + vlib_cli_output (vm, " l2 xconnect %U", + format_vnet_sw_if_index_name, + vnm, config->output_sw_if_index); + } + + /* Display any IP4 addressing info */ + foreach_ip_interface_address (lm4, ia, si->sw_if_index, + 1 /* honor unnumbered */, + ({ + r4 = ip_interface_address_get_address (lm4, ia); + if (fib4->table_id) + { + vlib_cli_output (vm, " %U/%d table %d", + format_ip4_address, r4, + ia->address_length, + fib4->table_id); + } + else + { + vlib_cli_output (vm, " %U/%d", + format_ip4_address, r4, + ia->address_length); + } + })); + + /* Display any IP6 addressing info */ + foreach_ip_interface_address (lm6, ia, si->sw_if_index, + 1 /* honor unnumbered */, + ({ + r6 = ip_interface_address_get_address (lm6, ia); + if (fib6->table_id) + { + vlib_cli_output (vm, " %U/%d table %d", + format_ip6_address, r6, + ia->address_length, + fib6->table_id); + } + else + { + vlib_cli_output (vm, " %U/%d", + format_ip6_address, r6, + ia->address_length); + } + })); + } + } + else + { + vec_foreach (si, sorted_sis) + { + vlib_cli_output (vm, "%U\n", format_vnet_sw_interface, vnm, si); + } + } + + done: + vec_free (sorted_sis); + return error; +} + +VLIB_CLI_COMMAND (show_sw_interfaces_command, static) = { + .path = "show interfaces", + .short_help = "show interfaces [address|addr] [<if-name1> <if-name2> ...]", + .function = show_sw_interfaces, +}; + +/* Root of all interface commands. */ +VLIB_CLI_COMMAND (vnet_cli_interface_command, static) = { + .path = "interface", + .short_help = "Interface commands", +}; + +VLIB_CLI_COMMAND (vnet_cli_set_interface_command, static) = { + .path = "set interface", + .short_help = "Interface commands", +}; + +static clib_error_t * +clear_interface_counters (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + vlib_simple_counter_main_t * sm; + vlib_combined_counter_main_t * cm; + static vnet_main_t ** my_vnet_mains; + int i, j, n_counters; + + vec_reset_length (my_vnet_mains); + + for (i = 0; i < vec_len (vnet_mains); i++) + { + if (vnet_mains[i]) + vec_add1 (my_vnet_mains, vnet_mains[i]); + } + + if (vec_len (vnet_mains) == 0) + vec_add1 (my_vnet_mains, vnm); + + n_counters = vec_len (im->combined_sw_if_counters); + + for (j = 0; j < n_counters; j++) + { + for (i = 0; i < vec_len(my_vnet_mains); i++) + { + im = &my_vnet_mains[i]->interface_main; + cm = im->combined_sw_if_counters + j; + vlib_clear_combined_counters (cm); + } + } + + n_counters = vec_len (im->sw_if_counters); + + for (j = 0; j < n_counters; j++) + { + for (i = 0; i < vec_len(my_vnet_mains); i++) + { + im = &my_vnet_mains[i]->interface_main; + sm = im->sw_if_counters + j; + vlib_clear_simple_counters (sm); + } + } + + return 0; +} + +VLIB_CLI_COMMAND (clear_interface_counters_command, static) = { + .path = "clear interfaces", + .short_help = "Clear interfaces statistics", + .function = clear_interface_counters, +}; + +// The following subinterface syntax is supported. The first two are for +// backwards compatability: +// +// <intf-name> <id> +// - a subinterface with the name <intf-name>.<id>. The subinterface +// is a single dot1q vlan with vlan id <id> and exact-match semantics. +// +// <intf-name> <min_id>-<max_id> +// - a set of the above subinterfaces, repeating for each id +// in the range <min_id> to <max_id> +// +// In the following, exact-match semantics (i.e. the number of vlan tags on the +// packet must match the number of tags in the configuration) are used only if +// the keyword exact-match is present. Non-exact match is the default. +// +// <intf-name> <id> dot1q <outer_id> [exact-match] +// - a subinterface with the name <intf-name>.<id>. The subinterface +// is a single dot1q vlan with vlan id <outer_id>. +// +// <intf-name> <id> dot1q any [exact-match] +// - a subinterface with the name <intf-name>.<id>. The subinterface +// is a single dot1q vlan with any vlan id. +// +// <intf-name> <id> dot1q <outer_id> inner-dot1q <inner_id> [exact-match] +// - a subinterface with the name <intf-name>.<id>. The subinterface +// is a double dot1q vlan with outer vlan id <outer_id> and inner vlan id +// <inner_id>. +// +// <intf-name> <id> dot1q <outer_id> inner-dot1q any [exact-match] +// - a subinterface with the name <intf-name>.<id>. The subinterface +// is a double dot1q vlan with outer vlan id <id> and any inner vlan id. +// +// <intf-name> <id> dot1q any inner-dot1q any [exact-match] +// +// - a subinterface with the name <intf-name>.<id>. The subinterface +// is a double dot1q vlan with any outer vlan id and any inner vlan id. +// +// For each of the above CLI, there is a duplicate that uses the keyword +// "dot1ad" in place of the first "dot1q". These interfaces use ethertype +// 0x88ad in place of 0x8100 for the outer ethertype. Note that for double- +// tagged packets the inner ethertype is always 0x8100. Also note that +// the dot1q and dot1ad naming spaces are independent, so it is legal to +// have both "Gig3/0/0.1 dot1q 100" and "Gig3/0/0.2 dot1ad 100". For example: +// +// <intf-name> <id> dot1ad <outer_id> inner-dot1q <inner_id> [exact-match] +// - a subinterface with the name <intf-name>.<id>. The subinterface +// is a double dot1ad vlan with outer vlan id <outer_id> and inner vlan +// id <inner_id>. +// +// <intf-name> <id> untagged +// - a subinterface with the name <intf-name>.<id>. The subinterface +// has no vlan tags. Only one can be specified per interface. +// +// <intf-name> <id> default +// - a subinterface with the name <intf-name>.<id>. This is associated +// with a packet that did not match any other configured subinterface +// on this interface. Only one can be specified per interface. + + +static clib_error_t * +parse_vlan_sub_interfaces (unformat_input_t * input, + vnet_sw_interface_t * template) +{ + clib_error_t * error = 0; + u32 inner_vlan, outer_vlan; + + if (unformat (input, "any inner-dot1q any")) { + template->sub.eth.flags.two_tags = 1; + template->sub.eth.flags.outer_vlan_id_any = 1; + template->sub.eth.flags.inner_vlan_id_any = 1; + } else if (unformat (input, "any")) { + template->sub.eth.flags.one_tag = 1; + template->sub.eth.flags.outer_vlan_id_any = 1; + } else if (unformat (input, "%d inner-dot1q any", &outer_vlan)) { + template->sub.eth.flags.two_tags = 1; + template->sub.eth.flags.inner_vlan_id_any = 1; + template->sub.eth.outer_vlan_id = outer_vlan; + } else if (unformat (input, "%d inner-dot1q %d", &outer_vlan, &inner_vlan)) { + template->sub.eth.flags.two_tags = 1; + template->sub.eth.outer_vlan_id = outer_vlan; + template->sub.eth.inner_vlan_id = inner_vlan; + } else if (unformat (input, "%d", &outer_vlan)) { + template->sub.eth.flags.one_tag = 1; + template->sub.eth.outer_vlan_id = outer_vlan; + } else { + error = clib_error_return (0, "expected dot1q config, got `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "exact-match")) { + template->sub.eth.flags.exact_match = 1; + } + } + + done: + return error; +} + +static clib_error_t * +create_sub_interfaces (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 hw_if_index, sw_if_index; + vnet_hw_interface_t * hi; + u32 id, id_min, id_max; + vnet_sw_interface_t template; + + hw_if_index = ~0; + if (! unformat_user (input, unformat_vnet_hw_interface, vnm, &hw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + memset (&template, 0, sizeof (template)); + template.sub.eth.raw_flags = 0; + + if (unformat (input, "%d default", &id_min)) { + id_max = id_min; + template.sub.eth.flags.default_sub = 1; + } else if (unformat (input, "%d untagged", &id_min)) { + id_max = id_min; + template.sub.eth.flags.no_tags = 1; + template.sub.eth.flags.exact_match = 1; + } else if (unformat (input, "%d dot1q", &id_min)) { + // parse dot1q config + id_max = id_min; + error = parse_vlan_sub_interfaces(input, &template); + if (error) goto done; + } else if (unformat (input, "%d dot1ad", &id_min)) { + // parse dot1ad config + id_max = id_min; + template.sub.eth.flags.dot1ad = 1; + error = parse_vlan_sub_interfaces(input, &template); + if (error) goto done; + } else if (unformat (input, "%d-%d", &id_min, &id_max)) { + template.sub.eth.flags.one_tag = 1; + template.sub.eth.outer_vlan_id = id_min; + template.sub.eth.flags.exact_match = 1; + if (id_min > id_max) + goto id_error; + } else if (unformat (input, "%d", &id_min)) { + id_max = id_min; + template.sub.eth.flags.one_tag = 1; + template.sub.eth.outer_vlan_id = id_min; + template.sub.eth.flags.exact_match = 1; + } else { + id_error: + error = clib_error_return (0, "expected ID or ID MIN-MAX, got `%U'", + format_unformat_error, input); + goto done; + } + + /* + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + error = clib_error_return (0, "unexpected text `%U'", + format_unformat_error, input); + goto done; + } + */ + + hi = vnet_get_hw_interface (vnm, hw_if_index); + for (id = id_min; id <= id_max; id++) + { + uword * p; + vnet_interface_main_t * im = &vnm->interface_main; + u64 sup_and_sub_key = ((u64)(hi->sw_if_index) << 32) | + (u64) id; + u64 * kp; + + p = hash_get_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key); + if (p) + { + if (CLIB_DEBUG > 0) + clib_warning ("sup sw_if_index %d, sub id %d already exists\n", + hi->sw_if_index, id); + continue; + } + + kp = clib_mem_alloc (sizeof (*kp)); + *kp = sup_and_sub_key; + + template.type = VNET_SW_INTERFACE_TYPE_SUB; + template.sup_sw_if_index = hi->sw_if_index; + template.sub.id = id; + error = vnet_create_sw_interface (vnm, &template, &sw_if_index); + if (error) goto done; + hash_set (hi->sub_interface_sw_if_index_by_id, id, sw_if_index); + hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, sw_if_index); + } + + if (error) + goto done; + + done: + return error; +} + +VLIB_CLI_COMMAND (create_sub_interfaces_command, static) = { + .path = "create sub-interface", + .short_help = "create sub-interfaces <nn>[-<nn>] [dot1q|dot1ad|default|untagged]", + .function = create_sub_interfaces, +}; + +static clib_error_t * +set_state (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error; + u32 sw_if_index, flags; + + sw_if_index = ~0; + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (! unformat (input, "%U", unformat_vnet_sw_interface_flags, &flags)) + { + error = clib_error_return (0, "unknown flags `%U'", + format_unformat_error, input); + goto done; + } + + error = vnet_sw_interface_set_flags (vnm, sw_if_index, flags); + if (error) + goto done; + + done: + return error; +} + +VLIB_CLI_COMMAND (set_state_command, static) = { + .path = "set interface state", + .short_help = "Set interface state", + .function = set_state, +}; + +static clib_error_t * +set_unnumbered (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 unnumbered_sw_if_index; + u32 inherit_from_sw_if_index; + vnet_sw_interface_t * si; + int is_set = 0; + int is_del = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat (input, "%U use %U", + unformat_vnet_sw_interface, vnm, &unnumbered_sw_if_index, + unformat_vnet_sw_interface, vnm, &inherit_from_sw_if_index)) + is_set = 1; + else if (unformat (input, "del %U", + unformat_vnet_sw_interface, + vnm, &unnumbered_sw_if_index)) + is_del = 1; + else + { + if (is_set || is_del) + break; + else + return clib_error_return + (0, "parse error '%U'", format_unformat_error, input); + } + } + + si = vnet_get_sw_interface (vnm, unnumbered_sw_if_index); + if (is_del) { + si->flags &= ~(VNET_SW_INTERFACE_FLAG_UNNUMBERED); + si->unnumbered_sw_if_index = (u32)~0; + } else { + si->flags |= VNET_SW_INTERFACE_FLAG_UNNUMBERED; + si->unnumbered_sw_if_index = inherit_from_sw_if_index; + } + + return 0; +} + +VLIB_CLI_COMMAND (set_unnumbered_command, static) = { + .path = "set interface unnumbered", + .short_help = "set interface unnumbered [<intfc> use <intfc>][del <intfc>]", + .function = set_unnumbered, +}; + + + +static clib_error_t * +set_hw_class (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + clib_error_t * error; + u32 hw_if_index, hw_class_index; + + hw_if_index = ~0; + if (! unformat_user (input, unformat_vnet_hw_interface, vnm, &hw_if_index)) + { + error = clib_error_return (0, "unknown hardware interface `%U'", + format_unformat_error, input); + goto done; + } + + if (! unformat_user (input, unformat_hash_string, + im->hw_interface_class_by_name, &hw_class_index)) + { + error = clib_error_return (0, "unknown hardware class `%U'", + format_unformat_error, input); + goto done; + } + + error = vnet_hw_interface_set_class (vnm, hw_if_index, hw_class_index); + if (error) + goto done; + + done: + return error; +} + +VLIB_CLI_COMMAND (set_hw_class_command, static) = { + .path = "set interface hw-class", + .short_help = "Set interface hardware class", + .function = set_hw_class, +}; + +static clib_error_t * vnet_interface_cli_init (vlib_main_t * vm) +{ return 0; } + +VLIB_INIT_FUNCTION (vnet_interface_cli_init); + +static clib_error_t * +renumber_interface_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 hw_if_index; + u32 new_dev_instance; + vnet_main_t * vnm = vnet_get_main(); + int rv; + + if (! unformat_user (input, unformat_vnet_hw_interface, vnm, &hw_if_index)) + return clib_error_return (0, "unknown hardware interface `%U'", + format_unformat_error, input); + + if (! unformat (input, "%d", &new_dev_instance)) + return clib_error_return (0, "new dev instance missing"); + + rv = vnet_interface_name_renumber (hw_if_index, new_dev_instance); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return (0, "vnet_interface_name_renumber returned %d", + rv); + + } + + return 0; +} + + +VLIB_CLI_COMMAND (renumber_interface_command, static) = { + .path = "renumber interface", + .short_help = "renumber interface <if-name> <new-dev-instance>", + .function = renumber_interface_command_fn, +}; + diff --git a/vnet/vnet/interface_format.c b/vnet/vnet/interface_format.c new file mode 100644 index 00000000000..56faea27217 --- /dev/null +++ b/vnet/vnet/interface_format.c @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * interface_format.c: interface formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> + +u8 * format_vnet_sw_interface_flags (u8 * s, va_list * args) +{ + u32 flags = va_arg (*args, u32); + + s = format (s, "%s", (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? "up" : "down"); + if (flags & VNET_SW_INTERFACE_FLAG_PUNT) + s = format (s, "/punt"); + + return s; +} + +u8 * format_vnet_hw_interface (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + vnet_hw_interface_t * hi = va_arg (*args, vnet_hw_interface_t *); + vnet_hw_interface_class_t * hw_class; + vnet_device_class_t * dev_class; + int verbose = va_arg (*args, int); + uword indent; + + if (! hi) + return format (s, "%=32s%=6s%=8s%s", + "Name", "Idx", "Link", "Hardware"); + + indent = format_get_indent (s); + + s = format (s, "%-32v%=6d%=8s", + hi->name, hi->hw_if_index, + hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP ? "up" : "down"); + + hw_class = vnet_get_hw_interface_class (vnm, hi->hw_class_index); + dev_class = vnet_get_device_class (vnm, hi->dev_class_index); + + if (dev_class->format_device_name) + s = format (s, "%U", dev_class->format_device_name, hi->dev_instance); + else + s = format (s, "%s%d", dev_class->name, hi->dev_instance); + + if (verbose) + { + if (hw_class->format_device) + s = format (s, "\n%U%U", + format_white_space, indent + 2, + hw_class->format_device, hi->hw_if_index, verbose); + else + { + s = format (s, "\n%U%s", + format_white_space, indent + 2, + hw_class->name); + if (hw_class->format_address && vec_len (hi->hw_address) > 0) + s = format (s, " address %U", hw_class->format_address, hi->hw_address); + } + + if (dev_class->format_device) + s = format (s, "\n%U%U", + format_white_space, indent + 2, + dev_class->format_device, hi->dev_instance, verbose); + } + + return s; +} + +u8 * format_vnet_sw_interface_name (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + vnet_sw_interface_t * si = va_arg (*args, vnet_sw_interface_t *); + vnet_sw_interface_t * si_sup = vnet_get_sup_sw_interface (vnm, si->sw_if_index); + vnet_hw_interface_t * hi_sup; + + ASSERT (si_sup->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + hi_sup = vnet_get_hw_interface (vnm, si_sup->hw_if_index); + + s = format (s, "%v", hi_sup->name); + + if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE) + s = format (s, ".%d", si->sub.id); + + return s; +} + +u8 * format_vnet_sw_if_index_name (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + u32 sw_if_index = va_arg (*args, u32); + return format (s, "%U", + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, sw_if_index)); +} + +u8 * format_vnet_sw_interface (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + vnet_sw_interface_t * si = va_arg (*args, vnet_sw_interface_t *); + vnet_interface_main_t * im = &vnm->interface_main; + uword indent, n_printed; + int i, j, n_counters; + static vnet_main_t ** my_vnet_mains; + + if (! si) + return format (s, "%=32s%=5s%=16s%=16s%=16s", + "Name", "Idx", "State", "Counter", "Count"); + + s = format (s, "%-32U%=5d%=16U", + format_vnet_sw_interface_name, vnm, si, si->sw_if_index, + format_vnet_sw_interface_flags, si->flags); + + vec_reset_length (my_vnet_mains); + + indent = format_get_indent (s); + n_printed = 0; + + { + vlib_combined_counter_main_t * cm; + vlib_counter_t v, vtotal; + u8 * n = 0; + + for (i = 0; i < vec_len (vnet_mains); i++) + { + if (vnet_mains[i]) + vec_add1 (my_vnet_mains, vnet_mains[i]); + } + + if (vec_len(my_vnet_mains) == 0) + vec_add1 (my_vnet_mains, &vnet_main); + + /* Each vnet_main_t has its own copy of the interface counters */ + n_counters = vec_len (im->combined_sw_if_counters); + + /* rx, tx counters... */ + for (j = 0; j < n_counters; j++) + { + vtotal.packets = 0; + vtotal.bytes = 0; + + for (i = 0; i < vec_len(my_vnet_mains); i++) + { + im = &my_vnet_mains[i]->interface_main; + cm = im->combined_sw_if_counters + j; + vlib_get_combined_counter (cm, si->sw_if_index, &v); + vtotal.packets += v.packets; + vtotal.bytes += v.bytes; + } + + /* Only display non-zero counters. */ + if (vtotal.packets == 0) + continue; + + if (n_printed > 0) + s = format (s, "\n%U", format_white_space, indent); + n_printed += 2; + + if (n) + _vec_len (n) = 0; + n = format (n, "%s packets", cm->name); + s = format (s, "%-16v%16Ld", n, vtotal.packets); + + _vec_len (n) = 0; + n = format (n, "%s bytes", cm->name); + s = format (s, "\n%U%-16v%16Ld", + format_white_space, indent, + n, vtotal.bytes); + } + vec_free (n); + } + + { + vlib_simple_counter_main_t * cm; + u64 v, vtotal ; + + n_counters = vec_len (im->sw_if_counters); + + for (j = 0; j < n_counters; j++) + { + vtotal = 0; + + for (i = 0; i < vec_len(my_vnet_mains); i++) + { + im = &my_vnet_mains[i]->interface_main; + cm = im->sw_if_counters + j; + + v = vlib_get_simple_counter (cm, si->sw_if_index); + vtotal += v; + } + + /* Only display non-zero counters. */ + if (vtotal == 0) + continue; + + if (n_printed > 0) + s = format (s, "\n%U", format_white_space, indent); + n_printed += 1; + + s = format (s, "%-16s%16Ld", cm->name, vtotal); + } + } + + return s; +} + +uword unformat_vnet_hw_interface (unformat_input_t * input, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + u32 * hw_if_index = va_arg (*args, u32 *); + vnet_interface_main_t * im = &vnm->interface_main; + vnet_device_class_t * c; + + /* Try per device class functions first. */ + vec_foreach (c, im->device_classes) + { + if (c->unformat_device_name + && unformat_user (input, c->unformat_device_name, hw_if_index)) + return 1; + } + + return unformat_user (input, unformat_hash_vec_string, + im->hw_interface_by_name, hw_if_index); +} + +uword unformat_vnet_sw_interface (unformat_input_t * input, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + u32 * result = va_arg (*args, u32 *); + vnet_hw_interface_t * hi; + u32 hw_if_index, id, id_specified; + u8 * if_name = 0; + uword * p, error = 0; + + id = ~0; + if (unformat (input, "%_%v.%d%_", &if_name, &id) + && ((p = hash_get (vnm->interface_main.hw_interface_by_name, if_name)))) + { + hw_if_index = p[0]; + id_specified = 1; + } + else if (unformat (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + id_specified = 0; + else + goto done; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + if (! id_specified) + { + *result = hi->sw_if_index; + } + else + { + if (! (p = hash_get (hi->sub_interface_sw_if_index_by_id, id))) + return 0; + *result = p[0]; + } + error = 1; + done: + vec_free (if_name); + return error; +} + +uword unformat_vnet_sw_interface_flags (unformat_input_t * input, va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + u32 flags = 0; + + if (unformat (input, "up")) + flags |= VNET_SW_INTERFACE_FLAG_ADMIN_UP; + else if (unformat (input, "down")) + flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP; + else if (unformat (input, "punt")) + flags |= VNET_SW_INTERFACE_FLAG_PUNT; + else if (unformat (input, "enable")) + flags &= ~VNET_SW_INTERFACE_FLAG_PUNT; + else + return 0; + + *result = flags; + return 1; +} + +uword unformat_vnet_hw_interface_flags (unformat_input_t * input, va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + u32 flags = 0; + + if (unformat (input, "up")) + flags |= VNET_HW_INTERFACE_FLAG_LINK_UP; + else if (unformat (input, "down")) + flags &= ~VNET_HW_INTERFACE_FLAG_LINK_UP; + else + return 0; + + *result = flags; + return 1; +} + diff --git a/vnet/vnet/interface_funcs.h b/vnet/vnet/interface_funcs.h new file mode 100644 index 00000000000..f3d3247fd5d --- /dev/null +++ b/vnet/vnet/interface_funcs.h @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * interface_funcs.h: VNET interfaces/sub-interfaces exported functions + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_interface_funcs_h +#define included_vnet_interface_funcs_h + +always_inline vnet_hw_interface_t * +vnet_get_hw_interface (vnet_main_t * vnm, u32 hw_if_index) +{ return pool_elt_at_index (vnm->interface_main.hw_interfaces, hw_if_index); } + +always_inline vnet_sw_interface_t * +vnet_get_sw_interface (vnet_main_t * vnm, u32 sw_if_index) +{ return pool_elt_at_index (vnm->interface_main.sw_interfaces, sw_if_index); } + +always_inline vnet_sw_interface_t * +vnet_get_hw_sw_interface (vnet_main_t * vnm, u32 hw_if_index) +{ + vnet_hw_interface_t * hw = vnet_get_hw_interface (vnm, hw_if_index); + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, hw->sw_if_index); + ASSERT (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + return sw; +} + +always_inline vnet_sw_interface_t * +vnet_get_sup_sw_interface (vnet_main_t * vnm, u32 sw_if_index) +{ + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, sw_if_index); + if (sw->type == VNET_SW_INTERFACE_TYPE_SUB) + sw = vnet_get_sw_interface (vnm, sw->sup_sw_if_index); + return sw; +} + +always_inline vnet_hw_interface_t * +vnet_get_sup_hw_interface (vnet_main_t * vnm, u32 sw_if_index) +{ + vnet_sw_interface_t * sw = vnet_get_sup_sw_interface (vnm, sw_if_index); + ASSERT (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + return vnet_get_hw_interface (vnm, sw->hw_if_index); +} + +always_inline vnet_hw_interface_class_t * +vnet_get_hw_interface_class (vnet_main_t * vnm, u32 hw_class_index) +{ return vec_elt_at_index (vnm->interface_main.hw_interface_classes, hw_class_index); } + +always_inline vnet_device_class_t * +vnet_get_device_class (vnet_main_t * vnm, u32 dev_class_index) +{ return vec_elt_at_index (vnm->interface_main.device_classes, dev_class_index); } + +/* Register a hardware interface instance. */ +u32 vnet_register_interface (vnet_main_t * vnm, + u32 dev_class_index, + u32 dev_instance, + u32 hw_class_index, + u32 hw_instance); + +/* Creates a software interface given template. */ +clib_error_t * +vnet_create_sw_interface (vnet_main_t * vnm, vnet_sw_interface_t * template, u32 * sw_if_index); + +void vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index); +void vnet_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index); + +always_inline uword +vnet_sw_interface_get_flags (vnet_main_t * vnm, u32 sw_if_index) +{ + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, sw_if_index); + return sw->flags; +} + +always_inline uword +vnet_sw_interface_is_admin_up (vnet_main_t * vnm, u32 sw_if_index) +{ return (vnet_sw_interface_get_flags (vnm, sw_if_index) & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; } + +always_inline uword +vnet_hw_interface_get_flags (vnet_main_t * vnm, u32 hw_if_index) +{ + vnet_hw_interface_t * hw = vnet_get_hw_interface (vnm, hw_if_index); + return hw->flags; +} + +always_inline uword +vnet_hw_interface_is_link_up (vnet_main_t * vnm, u32 hw_if_index) +{ return (vnet_hw_interface_get_flags (vnm, hw_if_index) & VNET_HW_INTERFACE_FLAG_LINK_UP) != 0; } + +always_inline vlib_frame_t * +vnet_get_frame_to_sw_interface (vnet_main_t * vnm, u32 sw_if_index) +{ + vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + return vlib_get_frame_to_node (vnm->vlib_main, hw->output_node_index); +} + +always_inline void +vnet_put_frame_to_sw_interface (vnet_main_t * vnm, u32 sw_if_index, vlib_frame_t * f) +{ + vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + return vlib_put_frame_to_node (vnm->vlib_main, hw->output_node_index, f); +} + +/* Change interface flags (e.g. up, down, enable, disable). */ +clib_error_t * +vnet_hw_interface_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags); + +/* Change interface flags (e.g. up, down, enable, disable). */ +clib_error_t * +vnet_sw_interface_set_flags (vnet_main_t * vnm, u32 sw_if_index, u32 flags); + +/* Change interface class. */ +clib_error_t * +vnet_hw_interface_set_class (vnet_main_t * vnm, u32 hw_if_index, u32 new_hw_class_index); + +/* Redirect rx pkts to node */ +int vnet_hw_interface_rx_redirect_to_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index); + +void vnet_hw_interface_init_for_class (vnet_main_t * vnm, u32 hw_if_index, u32 hw_class_index, u32 hw_instance); + +/* Formats sw/hw interface. */ +format_function_t format_vnet_hw_interface; +format_function_t format_vnet_sw_interface; +format_function_t format_vnet_sw_interface_name; +format_function_t format_vnet_sw_if_index_name; +format_function_t format_vnet_sw_interface_flags; + +/* Parses sw/hw interface name -> index. */ +unformat_function_t unformat_vnet_sw_interface; +unformat_function_t unformat_vnet_hw_interface; + +/* Parses interface flags (up, down, enable, disable, etc.) */ +unformat_function_t unformat_vnet_hw_interface_flags; +unformat_function_t unformat_vnet_sw_interface_flags; + +/* Node runtime for interface output function. */ +typedef struct { + u32 hw_if_index; + u32 sw_if_index; + u32 dev_instance; + u32 is_deleted; +} vnet_interface_output_runtime_t; + +/* Interface output functions. */ +uword +vnet_interface_output_node (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame); +uword +vnet_interface_output_node_no_flatten (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame); + +word vnet_sw_interface_compare (vnet_main_t * vnm, uword sw_if_index0, uword sw_if_index1); +word vnet_hw_interface_compare (vnet_main_t * vnm, uword hw_if_index0, uword hw_if_index1); + +typedef enum { +#define _(sym,str) VNET_INTERFACE_OUTPUT_NEXT_##sym, + foreach_intf_output_feat +#undef _ + VNET_INTERFACE_OUTPUT_NEXT_DROP, + VNET_INTERFACE_OUTPUT_NEXT_TX, +} vnet_interface_output_next_t; + +typedef enum { + VNET_INTERFACE_TX_NEXT_DROP, + VNET_INTERFACE_TX_N_NEXT, +} vnet_interface_tx_next_t; + +typedef enum { + VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN, + VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED, +} vnet_interface_output_error_t; + +/* Format for interface output traces. */ +u8 * format_vnet_interface_output_trace (u8 * s, va_list * va); + +serialize_function_t serialize_vnet_interface_state, unserialize_vnet_interface_state; + +#endif /* included_vnet_interface_funcs_h */ diff --git a/vnet/vnet/interface_output.c b/vnet/vnet/interface_output.c new file mode 100644 index 00000000000..84dc0392a51 --- /dev/null +++ b/vnet/vnet/interface_output.c @@ -0,0 +1,1311 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * interface_output.c: interface output node + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> + +typedef struct { + u32 sw_if_index; + u8 data[64 - sizeof (u32)]; +} interface_output_trace_t; + +u8 * format_vnet_interface_output_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + vlib_node_t * node = va_arg (*va, vlib_node_t *); + interface_output_trace_t * t = va_arg (*va, interface_output_trace_t *); + vnet_main_t * vnm = vnet_get_main(); + vnet_sw_interface_t * si; + uword indent; + + if (t->sw_if_index != (u32)~0) + { + si = vnet_get_sw_interface (vnm, t->sw_if_index); + indent = format_get_indent (s); + + s = format (s, "%U\n%U%U", + format_vnet_sw_interface_name, vnm, si, + format_white_space, indent, + node->format_buffer ? node->format_buffer : format_hex_bytes, + t->data, sizeof (t->data)); + } + return s; +} + +static void +vnet_interface_output_trace (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + uword n_buffers) +{ + u32 n_left, * from; + + n_left = n_buffers; + from = vlib_frame_args (frame); + + while (n_left >= 4) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + interface_output_trace_t * t0, * t1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + vlib_prefetch_buffer_with_index (vm, from[3], LOAD); + + bi0 = from[0]; + bi1 = from[1]; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + memcpy (t0->data, vlib_buffer_get_current (b0), + sizeof (t0->data)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + t1->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_TX]; + memcpy (t1->data, vlib_buffer_get_current (b1), + sizeof (t1->data)); + } + from += 2; + n_left -= 2; + } + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + interface_output_trace_t * t0; + + bi0 = from[0]; + + b0 = vlib_get_buffer (vm, bi0); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + memcpy (t0->data, vlib_buffer_get_current (b0), + sizeof (t0->data)); + } + from += 1; + n_left -= 1; + } +} + +static never_inline u32 +slow_path (vlib_main_t * vm, + u32 bi, + vlib_buffer_t * b, + u32 n_left_to_tx, + u32 * to_tx, + u32 * n_slow_bytes_result) +{ + /* We've already enqueued a single buffer. */ + u32 n_buffers = 0; + u32 n_slow_bytes = 0; + + while (n_left_to_tx > 0) + { + to_tx[0] = bi; + to_tx += 1; + n_left_to_tx -= 1; + n_buffers += 1; + n_slow_bytes += vlib_buffer_length_in_chain (vm, b); + + /* Be grumpy about zero length buffers for benefit of + driver tx function. */ + ASSERT (b->current_length > 0); + + if (! (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + break; + + bi = b->next_buffer; + b = vlib_get_buffer (vm, bi); + } + + /* Ran out of space in next frame trying to enqueue buffers? */ + if (b->flags & VLIB_BUFFER_NEXT_PRESENT) + return 0; + + *n_slow_bytes_result = n_slow_bytes; + return n_buffers; +} + +/* + * Increment TX stats. Roll up consecutive increments to the same sw_if_index + * into one increment. + */ +static_always_inline +void incr_output_stats (vnet_main_t * vnm, + u32 cpu_index, + u32 length, + u32 sw_if_index, + u32 * last_sw_if_index, + u32 * n_packets, + u32 * n_bytes) { + vnet_interface_main_t * im; + + if (PREDICT_TRUE (sw_if_index == *last_sw_if_index)) { + *n_packets += 1; + *n_bytes += length; + } else { + if (PREDICT_TRUE (*last_sw_if_index != ~0)) { + im = &vnm->interface_main; + + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + cpu_index, + *last_sw_if_index, + *n_packets, + *n_bytes); + } + *last_sw_if_index = sw_if_index; + *n_packets = 1; + *n_bytes = length; + } +} + + +/* Interface output functions. */ +uword +vnet_interface_output_node (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_output_runtime_t * rt = (void *) node->runtime_data; + vnet_sw_interface_t * si; + vnet_hw_interface_t * hi; + u32 n_left_to_tx, * from, * from_end, * to_tx; + u32 n_bytes, n_buffers, n_packets; + u32 last_sw_if_index; + u32 cpu_index = vm->cpu_index; + + n_buffers = frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vnet_interface_output_trace (vm, node, frame, n_buffers); + + from = vlib_frame_args (frame); + + if (rt->is_deleted) + return vlib_error_drop_buffers (vm, node, + from, + /* buffer stride */ 1, + n_buffers, + VNET_INTERFACE_OUTPUT_NEXT_DROP, + node->node_index, + VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED); + + si = vnet_get_sw_interface (vnm, rt->sw_if_index); + hi = vnet_get_sup_hw_interface (vnm, rt->sw_if_index); + if (! (si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) || + ! (hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) + { + vlib_simple_counter_main_t * cm; + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + vlib_increment_simple_counter (cm, cpu_index, + rt->sw_if_index, n_buffers); + return vlib_error_drop_buffers (vm, node, + from, + /* buffer stride */ 1, + n_buffers, + VNET_INTERFACE_OUTPUT_NEXT_DROP, + node->node_index, + VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN); + } + + from_end = from + n_buffers; + + /* Total byte count of all buffers. */ + n_bytes = 0; + n_packets = 0; + last_sw_if_index = ~0; + + while (from < from_end) + { + /* Get new next frame since previous incomplete frame may have less + than VNET_FRAME_SIZE vectors in it. */ + vlib_get_new_next_frame (vm, node, VNET_INTERFACE_OUTPUT_NEXT_TX, + to_tx, n_left_to_tx); + + while (from + 4 <= from_end && n_left_to_tx >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + vlib_prefetch_buffer_with_index (vm, from[3], LOAD); + + bi0 = from[0]; + bi1 = from[1]; + to_tx[0] = bi0; + to_tx[1] = bi1; + from += 2; + to_tx += 2; + n_left_to_tx -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* Be grumpy about zero length buffers for benefit of + driver tx function. */ + ASSERT (b0->current_length > 0); + ASSERT (b1->current_length > 0); + + if (PREDICT_FALSE ((b0->flags | b1->flags) & VLIB_BUFFER_NEXT_PRESENT)) + { + u32 n_buffers, n_slow_bytes, i; + + /* Undo. */ + from -= 2; + to_tx -= 2; + n_left_to_tx += 2; + + /* Do slow path two times. */ + for (i = 0; i < 2; i++) + { + u32 bi = i ? bi1 : bi0; + vlib_buffer_t * b = i ? b1 : b0; + + n_buffers = slow_path (vm, bi, b, + n_left_to_tx, to_tx, &n_slow_bytes); + + /* Not enough room for single packet? */ + if (n_buffers == 0) + goto put; + + from += 1; + to_tx += n_buffers; + n_left_to_tx -= n_buffers; + incr_output_stats (vnm, cpu_index, n_slow_bytes, + vnet_buffer(b)->sw_if_index[VLIB_TX], + &last_sw_if_index, &n_packets, &n_bytes); + } + } else { + incr_output_stats (vnm, cpu_index, + vlib_buffer_length_in_chain (vm, b0), + vnet_buffer(b0)->sw_if_index[VLIB_TX], + &last_sw_if_index, &n_packets, &n_bytes); + incr_output_stats (vnm, cpu_index, + vlib_buffer_length_in_chain (vm, b0), + vnet_buffer(b1)->sw_if_index[VLIB_TX], + &last_sw_if_index, &n_packets, &n_bytes); + } + } + + while (from + 1 <= from_end && n_left_to_tx >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_tx[0] = bi0; + from += 1; + to_tx += 1; + n_left_to_tx -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* Be grumpy about zero length buffers for benefit of + driver tx function. */ + ASSERT (b0->current_length > 0); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + u32 n_buffers, n_slow_bytes; + + /* Undo. */ + from -= 1; + to_tx -= 1; + n_left_to_tx += 1; + + n_buffers = slow_path (vm, bi0, b0, + n_left_to_tx, to_tx, &n_slow_bytes); + + /* Not enough room for single packet? */ + if (n_buffers == 0) + goto put; + + from += 1; + to_tx += n_buffers; + n_left_to_tx -= n_buffers; + } + incr_output_stats (vnm, cpu_index, + vlib_buffer_length_in_chain (vm, b0), + vnet_buffer(b0)->sw_if_index[VLIB_TX], + &last_sw_if_index, &n_packets, &n_bytes); + } + + put: + vlib_put_next_frame (vm, node, VNET_INTERFACE_OUTPUT_NEXT_TX, n_left_to_tx); + } + + /* Final update of interface stats. */ + incr_output_stats (vnm, cpu_index, 0, ~0, /* ~0 will flush stats */ + &last_sw_if_index, &n_packets, &n_bytes); + + return n_buffers; +} + + +uword +vnet_interface_output_node_no_flatten (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_output_runtime_t * rt = (void *) node->runtime_data; + vnet_sw_interface_t * si; + vnet_hw_interface_t * hi; + u32 n_left_to_tx, * from, * from_end, * to_tx; + u32 n_bytes, n_buffers, n_packets; + u32 n_bytes_b0, n_bytes_b1; + u32 cpu_index = vm->cpu_index; + vnet_interface_main_t * im = &vnm->interface_main; + u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX; + + n_buffers = frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vnet_interface_output_trace (vm, node, frame, n_buffers); + + from = vlib_frame_args (frame); + + if (rt->is_deleted) + return vlib_error_drop_buffers (vm, node, + from, + /* buffer stride */ 1, + n_buffers, + VNET_INTERFACE_OUTPUT_NEXT_DROP, + node->node_index, + VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED); + + si = vnet_get_sw_interface (vnm, rt->sw_if_index); + hi = vnet_get_sup_hw_interface (vnm, rt->sw_if_index); + if (! (si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) || + ! (hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) + { + vlib_simple_counter_main_t * cm; + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + vlib_increment_simple_counter (cm, cpu_index, + rt->sw_if_index, n_buffers); + + return vlib_error_drop_buffers (vm, node, + from, + /* buffer stride */ 1, + n_buffers, + VNET_INTERFACE_OUTPUT_NEXT_DROP, + node->node_index, + VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN); + } + + from_end = from + n_buffers; + + /* Total byte count of all buffers. */ + n_bytes = 0; + n_packets = 0; + + while (from < from_end) + { + /* Get new next frame since previous incomplete frame may have less + than VNET_FRAME_SIZE vectors in it. */ + vlib_get_new_next_frame (vm, node, next_index, + to_tx, n_left_to_tx); + + while (from + 4 <= from_end && n_left_to_tx >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 tx_swif0, tx_swif1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + vlib_prefetch_buffer_with_index (vm, from[3], LOAD); + + bi0 = from[0]; + bi1 = from[1]; + to_tx[0] = bi0; + to_tx[1] = bi1; + from += 2; + to_tx += 2; + n_left_to_tx -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* Be grumpy about zero length buffers for benefit of + driver tx function. */ + ASSERT (b0->current_length > 0); + ASSERT (b1->current_length > 0); + + n_bytes_b0 = vlib_buffer_length_in_chain (vm, b0); + n_bytes_b1 = vlib_buffer_length_in_chain (vm, b1); + tx_swif0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + tx_swif1 = vnet_buffer(b1)->sw_if_index[VLIB_TX]; + + n_bytes += n_bytes_b0 + n_bytes_b1; + n_packets += 2; + + if (PREDICT_FALSE(si->output_feature_bitmap && + vnet_buffer(b0)->output_features.bitmap != (1 << INTF_OUTPUT_FEAT_DONE))) + { + u32 next0; + vnet_buffer(b0)->output_features.bitmap = si->output_feature_bitmap; + count_trailing_zeros(next0, vnet_buffer(b0)->output_features.bitmap); + vnet_buffer(b0)->output_features.bitmap &= ~(1 << next0); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_tx, + n_left_to_tx, bi0, next0); + } + else + { + vnet_buffer(b0)->output_features.bitmap = 0; + + if (PREDICT_FALSE(tx_swif0 != rt->sw_if_index)) + { + /* update vlan subif tx counts, if required */ + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + cpu_index, + tx_swif0, + 1, + n_bytes_b0); + } + } + + if (PREDICT_FALSE(si->output_feature_bitmap && + vnet_buffer(b1)->output_features.bitmap != (1 << INTF_OUTPUT_FEAT_DONE))) + { + u32 next1; + vnet_buffer(b1)->output_features.bitmap = si->output_feature_bitmap; + count_trailing_zeros(next1, vnet_buffer(b1)->output_features.bitmap); + vnet_buffer(b1)->output_features.bitmap &= ~(1 << next1); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_tx, + n_left_to_tx, bi1, next1); + } + else + { + vnet_buffer(b1)->output_features.bitmap = 0; + + /* update vlan subif tx counts, if required */ + if (PREDICT_FALSE(tx_swif1 != rt->sw_if_index)) + { + + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + cpu_index, + tx_swif1, + 1, + n_bytes_b1); + } + } + + } + + while (from + 1 <= from_end && n_left_to_tx >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + u32 tx_swif0; + + bi0 = from[0]; + to_tx[0] = bi0; + from += 1; + to_tx += 1; + n_left_to_tx -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* Be grumpy about zero length buffers for benefit of + driver tx function. */ + ASSERT (b0->current_length > 0); + + n_bytes_b0 = vlib_buffer_length_in_chain (vm, b0); + tx_swif0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + n_bytes += n_bytes_b0; + n_packets += 1; + + if (PREDICT_FALSE(si->output_feature_bitmap && + vnet_buffer(b0)->output_features.bitmap != (1 << INTF_OUTPUT_FEAT_DONE))) + { + u32 next0; + vnet_buffer(b0)->output_features.bitmap = si->output_feature_bitmap; + count_trailing_zeros(next0, vnet_buffer(b0)->output_features.bitmap); + vnet_buffer(b0)->output_features.bitmap &= ~(1 << next0); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_tx, + n_left_to_tx, bi0, next0); + } + else + { + vnet_buffer(b0)->output_features.bitmap = 0; + + if (PREDICT_FALSE(tx_swif0 != rt->sw_if_index)) + { + + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + cpu_index, + tx_swif0, + 1, + n_bytes_b0); + } + } + } + + vlib_put_next_frame (vm, node, next_index, + n_left_to_tx); + } + + /* Update main interface stats. */ + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + cpu_index, + rt->sw_if_index, + n_packets, + n_bytes); + return n_buffers; +} + + +/* Use buffer's sw_if_index[VNET_TX] to choose output interface. */ +static uword +vnet_per_buffer_interface_output (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 n_left_to_next, * from, * to_next; + u32 n_left_from, next_index; + + n_left_from = frame->n_vectors; + + from = vlib_frame_args (frame); + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1, next0, next1; + vlib_buffer_t * b0, * b1; + vnet_hw_interface_t * hi0, * hi1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + vlib_prefetch_buffer_with_index (vm, from[3], LOAD); + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + hi0 = vnet_get_sup_hw_interface (vnm, vnet_buffer (b0)->sw_if_index[VLIB_TX]); + hi1 = vnet_get_sup_hw_interface (vnm, vnet_buffer (b1)->sw_if_index[VLIB_TX]); + + next0 = hi0->hw_if_index; + next1 = hi1->hw_if_index; + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t * b0; + vnet_hw_interface_t * hi0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + hi0 = vnet_get_sup_hw_interface (vnm, vnet_buffer (b0)->sw_if_index[VLIB_TX]); + + next0 = hi0->hw_if_index; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +always_inline u32 +counter_index (vlib_main_t * vm, vlib_error_t e) +{ + vlib_node_t * n; + u32 ci, ni; + + ni = vlib_error_get_node (e); + n = vlib_get_node (vm, ni); + + ci = vlib_error_get_code (e); + ASSERT (ci < n->n_errors); + + ci += n->error_heap_index; + + return ci; +} + +static u8 * format_vnet_error_trace (u8 * s, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + vlib_error_t * e = va_arg (*va, vlib_error_t *); + vlib_node_t * error_node; + vlib_error_main_t * em = &vm->error_main; + u32 i; + + error_node = vlib_get_node (vm, vlib_error_get_node (e[0])); + i = counter_index (vm, e[0]); + s = format (s, "%v: %s", error_node->name, em->error_strings_heap[i]); + + return s; +} + +static void +trace_errors_with_buffers (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left, * buffers; + + buffers = vlib_frame_vector_args (frame); + n_left = frame->n_vectors; + + while (n_left >= 4) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + vlib_error_t * t0, * t1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD); + vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD); + + bi0 = buffers[0]; + bi1 = buffers[1]; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0[0] = b0->error; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + t1[0] = b1->error; + } + buffers += 2; + n_left -= 2; + } + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + vlib_error_t * t0; + + bi0 = buffers[0]; + + b0 = vlib_get_buffer (vm, bi0); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0[0] = b0->error; + } + buffers += 1; + n_left -= 1; + } +} + +static u8 * +validate_error (vlib_main_t * vm, vlib_error_t * e, u32 index) +{ + uword node_index = vlib_error_get_node (e[0]); + uword code = vlib_error_get_code (e[0]); + vlib_node_t * n; + + if (node_index >= vec_len (vm->node_main.nodes)) + return format (0, "[%d], node index out of range 0x%x, error 0x%x", + index, node_index, e[0]); + + n = vlib_get_node (vm, node_index); + if (code >= n->n_errors) + return format (0, "[%d], code %d out of range for node %v", + index, code, n->name); + + return 0; +} + +static u8 * +validate_error_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + u32 * buffers = vlib_frame_args (f); + vlib_buffer_t * b; + u8 * msg = 0; + uword i; + + for (i = 0; i < f->n_vectors; i++) + { + b = vlib_get_buffer (vm, buffers[i]); + msg = validate_error (vm, &b->error, i); + if (msg) + return msg; + } + + return msg; +} + +typedef enum { + VNET_ERROR_DISPOSITION_DROP, + VNET_ERROR_DISPOSITION_PUNT, + VNET_ERROR_N_DISPOSITION, +} vnet_error_disposition_t; + +always_inline void +do_packet (vlib_main_t * vm, vlib_error_t a) +{ + vlib_error_main_t * em = &vm->error_main; + u32 i = counter_index (vm, a); + em->counters[i] += 1; + vlib_error_elog_count (vm, i, 1); +} + +static_always_inline uword +process_drop_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + vnet_error_disposition_t disposition) +{ + vnet_main_t * vnm = vnet_get_main(); + vlib_error_main_t * em = &vm->error_main; + u32 * buffers, * first_buffer; + vlib_error_t current_error; + u32 current_counter_index, n_errors_left; + u32 current_sw_if_index, n_errors_current_sw_if_index; + u64 current_counter; + vlib_simple_counter_main_t * cm; + u32 cpu_index = vm->cpu_index; + + static vlib_error_t memory[VNET_ERROR_N_DISPOSITION]; + static char memory_init[VNET_ERROR_N_DISPOSITION]; + + buffers = vlib_frame_args (frame); + first_buffer = buffers; + + { + vlib_buffer_t * b = vlib_get_buffer (vm, first_buffer[0]); + + if (! memory_init[disposition]) + { + memory_init[disposition] = 1; + memory[disposition] = b->error; + } + + current_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + n_errors_current_sw_if_index = 0; + } + + current_error = memory[disposition]; + current_counter_index = counter_index (vm, memory[disposition]); + current_counter = em->counters[current_counter_index]; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + trace_errors_with_buffers (vm, node, frame); + + n_errors_left = frame->n_vectors; + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + (disposition == VNET_ERROR_DISPOSITION_PUNT + ? VNET_INTERFACE_COUNTER_PUNT + : VNET_INTERFACE_COUNTER_DROP)); + + while (n_errors_left >= 2) + { + vlib_buffer_t * b0, * b1; + vnet_sw_interface_t * sw_if0, * sw_if1; + vlib_error_t e0, e1; + u32 bi0, bi1; + u32 sw_if_index0, sw_if_index1; + + bi0 = buffers[0]; + bi1 = buffers[1]; + + buffers += 2; + n_errors_left -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + e0 = b0->error; + e1 = b1->error; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + /* Speculate that sw_if_index == sw_if_index[01]. */ + n_errors_current_sw_if_index += 2; + + /* Speculatively assume all 2 (node, code) pairs are equal + to current (node, code). */ + current_counter += 2; + + if (PREDICT_FALSE (e0 != current_error + || e1 != current_error + || sw_if_index0 != current_sw_if_index + || sw_if_index1 != current_sw_if_index)) + { + current_counter -= 2; + n_errors_current_sw_if_index -= 2; + + vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1); + + /* Increment super-interface drop/punt counters for + sub-interfaces. */ + sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0); + vlib_increment_simple_counter + (cm, cpu_index, sw_if0->sup_sw_if_index, + sw_if0->sup_sw_if_index != sw_if_index0); + + sw_if1 = vnet_get_sw_interface (vnm, sw_if_index1); + vlib_increment_simple_counter + (cm, cpu_index, sw_if1->sup_sw_if_index, + sw_if1->sup_sw_if_index != sw_if_index1); + + em->counters[current_counter_index] = current_counter; + do_packet (vm, e0); + do_packet (vm, e1); + + /* For 2 repeated errors, change current error. */ + if (e0 == e1 && e1 != current_error) + { + current_error = e0; + current_counter_index = counter_index (vm, e0); + } + current_counter = em->counters[current_counter_index]; + } + } + + while (n_errors_left >= 1) + { + vlib_buffer_t * b0; + vnet_sw_interface_t * sw_if0; + vlib_error_t e0; + u32 bi0, sw_if_index0; + + bi0 = buffers[0]; + + buffers += 1; + n_errors_left -= 1; + current_counter += 1; + + b0 = vlib_get_buffer (vm, bi0); + e0 = b0->error; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + /* Increment drop/punt counters. */ + vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + + /* Increment super-interface drop/punt counters for sub-interfaces. */ + sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0); + vlib_increment_simple_counter (cm, cpu_index, sw_if0->sup_sw_if_index, + sw_if0->sup_sw_if_index != sw_if_index0); + + if (PREDICT_FALSE (e0 != current_error)) + { + current_counter -= 1; + + vlib_error_elog_count (vm, current_counter_index, + (current_counter + - em->counters[current_counter_index])); + + em->counters[current_counter_index] = current_counter; + + do_packet (vm, e0); + current_error = e0; + current_counter_index = counter_index (vm, e0); + current_counter = em->counters[current_counter_index]; + } + } + + if (n_errors_current_sw_if_index > 0) + { + vnet_sw_interface_t * si; + + vlib_increment_simple_counter (cm, cpu_index, current_sw_if_index, + n_errors_current_sw_if_index); + + si = vnet_get_sw_interface (vnm, current_sw_if_index); + if (si->sup_sw_if_index != current_sw_if_index) + vlib_increment_simple_counter (cm, cpu_index, si->sup_sw_if_index, + n_errors_current_sw_if_index); + } + + vlib_error_elog_count (vm, current_counter_index, + (current_counter + - em->counters[current_counter_index])); + + /* Return cached counter. */ + em->counters[current_counter_index] = current_counter; + + /* Save memory for next iteration. */ + memory[disposition] = current_error; + + if (disposition == VNET_ERROR_DISPOSITION_DROP + || ! vm->os_punt_frame) + { + vlib_buffer_free + (vm, + first_buffer, + frame->n_vectors); + + /* If there is no punt function, free the frame as well. */ + if (disposition == VNET_ERROR_DISPOSITION_PUNT && ! vm->os_punt_frame) + vlib_frame_free (vm, node, frame); + } + else + vm->os_punt_frame (vm, node, frame); + + return frame->n_vectors; +} + +static inline void +pcap_drop_trace (vlib_main_t * vm, + vnet_interface_main_t * im, + vlib_frame_t * f) +{ + u32 * from; + u32 n_left = f->n_vectors; + vlib_buffer_t * b0, * p1; + u32 bi0; + i16 save_current_data; + u16 save_current_length; + + from = vlib_frame_vector_args (f); + + while (n_left > 0) + { + if (PREDICT_TRUE (n_left > 1)) + { + p1 = vlib_get_buffer (vm, from[1]); + vlib_prefetch_buffer_header (p1, LOAD); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + from++; + n_left--; + + /* See if we're pointedly ignoring this specific error */ + if (im->pcap_drop_filter_hash + && hash_get (im->pcap_drop_filter_hash, b0->error)) + continue; + + /* Trace all drops, or drops received on a specific interface */ + if (im->pcap_sw_if_index == 0 || + im->pcap_sw_if_index == vnet_buffer(b0)->sw_if_index [VLIB_RX]) + { + save_current_data = b0->current_data; + save_current_length = b0->current_length; + + /* + * Typically, we'll need to rewind the buffer + */ + if (b0->current_data > 0) + vlib_buffer_advance (b0, (word) -b0->current_data); + + pcap_add_buffer (&im->pcap_main, vm, bi0, 512); + + b0->current_data = save_current_data; + b0->current_length = save_current_length; + } + } +} + +void vnet_pcap_drop_trace_filter_add_del (u32 error_index, int is_add) +{ + vnet_interface_main_t * im = &vnet_get_main()->interface_main; + + if (im->pcap_drop_filter_hash == 0) + im->pcap_drop_filter_hash = hash_create (0, sizeof (uword)); + + if (is_add) + hash_set (im->pcap_drop_filter_hash, error_index, 1); + else + hash_unset (im->pcap_drop_filter_hash, error_index); +} + +static uword +process_drop (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_interface_main_t * im = &vnet_get_main()->interface_main; + + if (PREDICT_FALSE (im->drop_pcap_enable)) + pcap_drop_trace (vm, im, frame); + + return process_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_DROP); +} + +static uword +process_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return process_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_PUNT); +} + +VLIB_REGISTER_NODE (drop_buffers,static) = { + .function = process_drop, + .name = "error-drop", + .flags = VLIB_NODE_FLAG_IS_DROP, + .vector_size = sizeof (u32), + .format_trace = format_vnet_error_trace, + .validate_frame = validate_error_frame, +}; + +VLIB_REGISTER_NODE (punt_buffers,static) = { + .function = process_punt, + .flags = (VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH + | VLIB_NODE_FLAG_IS_PUNT), + .name = "error-punt", + .vector_size = sizeof (u32), + .format_trace = format_vnet_error_trace, + .validate_frame = validate_error_frame, +}; + +static clib_error_t * +vnet_per_buffer_interface_output_hw_interface_add_del (vnet_main_t * vnm, + u32 hw_if_index, + u32 is_create); + +VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node,static) = { + .function = vnet_per_buffer_interface_output, + .name = "interface-output", + .vector_size = sizeof (u32), +}; + +clib_error_t * +vnet_per_buffer_interface_output_hw_interface_add_del (vnet_main_t * vnm, + u32 hw_if_index, + u32 is_create) +{ + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); + u32 next_index; + + next_index = vlib_node_add_next_with_slot + (vnm->vlib_main, vnet_per_buffer_interface_output_node.index, + hi->output_node_index, + /* next_index */ hw_if_index); + + ASSERT (next_index == hw_if_index); + + return 0; +} + +VNET_HW_INTERFACE_ADD_DEL_FUNCTION +(vnet_per_buffer_interface_output_hw_interface_add_del); + +static clib_error_t * +pcap_drop_trace_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + u8 * filename; + u32 max; + int matched = 0; + clib_error_t * error = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "on")) + { + if (im->drop_pcap_enable == 0) + { + if (im->pcap_filename == 0) + im->pcap_filename = format (0, "/tmp/drop.pcap%c", 0); + + memset (&im->pcap_main, 0, sizeof (im->pcap_main)); + im->pcap_main.file_name = (char *) im->pcap_filename; + im->pcap_main.n_packets_to_capture = 100; + if (im->pcap_pkts_to_capture) + im->pcap_main.n_packets_to_capture = im->pcap_pkts_to_capture; + + im->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; + im->drop_pcap_enable = 1; + matched = 1; + vlib_cli_output (vm, "pcap drop capture on..."); + } + else + { + vlib_cli_output (vm, "pcap drop capture already on..."); + } + matched = 1; + } + else if (unformat (input, "off")) + { + matched = 1; + + if (im->drop_pcap_enable) + { + vlib_cli_output (vm, "captured %d pkts...", + im->pcap_main.n_packets_captured); + if (im->pcap_main.n_packets_captured) + { + im->pcap_main.n_packets_to_capture = + im->pcap_main.n_packets_captured; + error = pcap_write (&im->pcap_main); + if (error) + clib_error_report (error); + else + vlib_cli_output (vm, "saved to %s...", im->pcap_filename); + } + } + else + { + vlib_cli_output (vm, "pcap drop capture already off..."); + } + + im->drop_pcap_enable = 0; + } + else if (unformat (input, "max %d", &max)) + { + im->pcap_pkts_to_capture = max; + matched = 1; + } + + else if (unformat (input, "intfc %U", + unformat_vnet_sw_interface, vnm, + &im->pcap_sw_if_index)) + matched = 1; + else if (unformat (input, "intfc any")) + { + im->pcap_sw_if_index = 0; + matched = 1; + } + else if (unformat (input, "file %s", &filename)) + { + u8 * chroot_filename; + /* Brain-police user path input */ + if (strstr((char *)filename, "..") || index((char *)filename, '/')) + { + vlib_cli_output (vm, "illegal characters in filename '%s'", + filename); + continue; + } + + chroot_filename = format (0, "/tmp/%s%c", filename, 0); + vec_free (filename); + + if (im->pcap_filename) + vec_free (im->pcap_filename); + vec_add1 (filename, 0); + im->pcap_filename = chroot_filename; + matched = 1; + } + else if (unformat (input, "status")) + { + if (im->drop_pcap_enable == 0) + { + vlib_cli_output (vm, "pcap drop capture is off..."); + continue; + } + + vlib_cli_output (vm, "pcap drop capture: %d of %d pkts...", + im->pcap_main.n_packets_captured, + im->pcap_main.n_packets_to_capture); + matched = 1; + } + + else + break; + } + + if (matched == 0) + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + return 0; +} + +VLIB_CLI_COMMAND (pcap_trace_command, static) = { + .path = "pcap drop trace", + .short_help = + "pcap drop trace on off max <nn> intfc <intfc> file <name> status", + .function = pcap_drop_trace_command_fn, +}; diff --git a/vnet/vnet/ip/format.c b/vnet/vnet/ip/format.c new file mode 100644 index 00000000000..9dda4c5e10b --- /dev/null +++ b/vnet/vnet/ip/format.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip_format.c: ip generic (4 or 6) formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +/* Format IP protocol. */ +u8 * format_ip_protocol (u8 * s, va_list * args) +{ + ip_protocol_t protocol = va_arg (*args, ip_protocol_t); + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi = ip_get_protocol_info (im, protocol); + + if (pi) + return format (s, "%s", pi->name); + else + return format (s, "unknown %d", protocol); +} + +uword unformat_ip_protocol (unformat_input_t * input, va_list * args) +{ + u8 * result = va_arg (*args, u8 *); + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi; + int i; + + if (! unformat_user (input, unformat_vlib_number_by_name, + im->protocol_info_by_name, &i)) + return 0; + + pi = vec_elt_at_index (im->protocol_infos, i); + *result = pi->protocol; + return 1; +} + +u8 * format_tcp_udp_port (u8 * s, va_list * args) +{ + int port = va_arg (*args, int); + ip_main_t * im = &ip_main; + tcp_udp_port_info_t * pi; + + pi = ip_get_tcp_udp_port_info (im, port); + if (pi) + s = format (s, "%s", pi->name); + else + s = format (s, "%d", clib_net_to_host_u16 (port)); + + return s; +} + +uword unformat_tcp_udp_port (unformat_input_t * input, va_list * args) +{ + u16 * result = va_arg (*args, u16 *); + ip_main_t * im = &ip_main; + tcp_udp_port_info_t * pi; + u32 i, port; + + + if (unformat_user (input, unformat_vlib_number_by_name, + im->port_info_by_name, &i)) + { + pi = vec_elt_at_index (im->port_infos, i); + port = pi->port; + } + else if (unformat_user (input, unformat_vlib_number, &port) + && port < (1 << 16)) + port = clib_host_to_net_u16 (port); + + else + return 0; + + *result = port; + return 1; +} + +uword unformat_ip46_address (unformat_input_t * input, va_list * args) +{ + ip46_address_t * a = va_arg (*args, ip46_address_t *); + u32 is_ip6 = va_arg (*args, u32); + if (is_ip6) + return unformat_user (input, unformat_ip6_address, &a->ip6); + else + return unformat_user (input, unformat_ip4_address, &a->ip4); +} diff --git a/vnet/vnet/ip/format.h b/vnet/vnet/ip/format.h new file mode 100644 index 00000000000..511a9346bf6 --- /dev/null +++ b/vnet/vnet/ip/format.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/format.h: ip 4 and/or 6 formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_format_h +#define included_ip_format_h + +/* IP4 or IP6. */ + +format_function_t format_ip_protocol; +unformat_function_t unformat_ip_protocol; + +format_function_t format_tcp_udp_port; +unformat_function_t unformat_tcp_udp_port; + +format_function_t format_ip_adjacency; +format_function_t format_ip_adjacency_packet_data; + +unformat_function_t unformat_ip46_address; + +/* IP4 */ + +/* Parse an IP4 address %d.%d.%d.%d. */ +unformat_function_t unformat_ip4_address; + +/* Format an IP4 address. */ +format_function_t format_ip4_address; +format_function_t format_ip4_address_and_length; + +/* Parse an IP4 header. */ +unformat_function_t unformat_ip4_header; + +/* Format an IP4 header. */ +format_function_t format_ip4_header; + +/* Parse an IP packet matching pattern. */ +unformat_function_t unformat_ip4_match; + +unformat_function_t unformat_pg_ip4_header; + +/* IP6 */ +unformat_function_t unformat_ip6_address; +format_function_t format_ip6_address; +format_function_t format_ip6_address_and_length; +unformat_function_t unformat_ip6_header; +format_function_t format_ip6_header; +unformat_function_t unformat_pg_ip6_header; + +/* Format a TCP/UDP headers. */ +format_function_t format_tcp_header, format_udp_header; + +unformat_function_t unformat_pg_tcp_header, unformat_pg_udp_header; + +#endif /* included_ip_format_h */ diff --git a/vnet/vnet/ip/icmp4.c b/vnet/vnet/ip/icmp4.c new file mode 100644 index 00000000000..e21f3bf047b --- /dev/null +++ b/vnet/vnet/ip/icmp4.c @@ -0,0 +1,734 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/icmp4.c: ipv4 icmp + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> + +static u8 * format_ip4_icmp_type_and_code (u8 * s, va_list * args) +{ + icmp4_type_t type = va_arg (*args, int); + u8 code = va_arg (*args, int); + char * t = 0; + +#define _(n,f) case n: t = #f; break; + + switch (type) + { + foreach_icmp4_type; + + default: + break; + } + +#undef _ + + if (! t) + return format (s, "unknown 0x%x", type); + + s = format (s, "%s", t); + + t = 0; + switch ((type << 8) | code) + { +#define _(a,n,f) case (ICMP4_##a << 8) | (n): t = #f; break; + + foreach_icmp4_code; + +#undef _ + } + + if (t) + s = format (s, " %s", t); + + return s; +} + +static u8 * format_ip4_icmp_header (u8 * s, va_list * args) +{ + icmp46_header_t * icmp = va_arg (*args, icmp46_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + + /* Nothing to do. */ + if (max_header_bytes < sizeof (icmp[0])) + return format (s, "ICMP header truncated"); + + s = format (s, "ICMP %U checksum 0x%x", + format_ip4_icmp_type_and_code, icmp->type, icmp->code, + clib_net_to_host_u16 (icmp->checksum)); + + return s; +} + +typedef struct { + u8 packet_data[64]; +} icmp_input_trace_t; + +static u8 * format_icmp_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + icmp_input_trace_t * t = va_arg (*va, icmp_input_trace_t *); + + s = format (s, "%U", + format_ip4_header, + t->packet_data, sizeof (t->packet_data)); + + return s; +} + +typedef enum { + ICMP4_ERROR_UNKNOWN_TYPE, + ICMP4_ERROR_ECHO_REPLIES_SENT, + ICMP4_ERROR_TTL_EXPIRE_RESP_SENT, + ICMP4_ERROR_TTL_EXPIRE_RESP_DROP, +} icmp_error_t; + +static char * icmp_error_strings[] = { + [ICMP4_ERROR_UNKNOWN_TYPE] = "unknown type", + [ICMP4_ERROR_ECHO_REPLIES_SENT] = "echo replies sent", + [ICMP4_ERROR_TTL_EXPIRE_RESP_SENT] = "TTL time exceeded response sent", + [ICMP4_ERROR_TTL_EXPIRE_RESP_DROP] = "TTL time exceeded response dropped", +}; + +typedef enum { + ICMP_INPUT_NEXT_ERROR, + ICMP_INPUT_N_NEXT, +} icmp_input_next_t; + +typedef struct { + uword * type_and_code_by_name; + + uword * type_by_name; + + /* Vector dispatch table indexed by [icmp type]. */ + u8 ip4_input_next_index_by_type[256]; +} icmp4_main_t; + +icmp4_main_t icmp4_main; + +static uword +ip4_icmp_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + icmp4_main_t * im = &icmp4_main; + uword n_packets = frame->n_vectors; + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next; + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + icmp46_header_t * icmp0; + icmp4_type_t type0; + u32 bi0, next0; + + if (PREDICT_TRUE (n_left_from > 2)) + { + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + p0 = vlib_get_buffer (vm, from[1]); + ip0 = vlib_buffer_get_current (p0); + CLIB_PREFETCH(ip0, CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + icmp0 = ip4_next_header (ip0); + type0 = icmp0->type; + next0 = im->ip4_input_next_index_by_type[type0]; + + p0->error = node->errors[ICMP4_ERROR_UNKNOWN_TYPE]; + if (PREDICT_FALSE (next0 != next)) + { + vlib_put_next_frame (vm, node, next, n_left_to_next + 1); + next = next0; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip4_icmp_input_node,static) = { + .function = ip4_icmp_input, + .name = "ip4-icmp-input", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp_input_trace, + + .n_errors = ARRAY_LEN (icmp_error_strings), + .error_strings = icmp_error_strings, + + .n_next_nodes = 1, + .next_nodes = { + [ICMP_INPUT_NEXT_ERROR] = "error-punt", + }, +}; + +static uword +ip4_icmp_echo_request (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + uword n_packets = frame->n_vectors; + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next; + ip4_main_t * i4m = &ip4_main; + u16 * fragment_ids, * fid; + u8 host_config_ttl = i4m->host_config.ttl; + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp_input_trace_t)); + + /* Get random fragment IDs for replies. */ + fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer, + n_packets * sizeof (fragment_ids[0])); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_left_from > 2 && n_left_to_next > 2) + { + vlib_buffer_t * p0, * p1; + ip4_header_t * ip0, * ip1; + icmp46_header_t * icmp0, * icmp1; + u32 bi0, src0, dst0; + u32 bi1, src1, dst1; + ip_csum_t sum0, sum1; + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, bi0); + p1 = vlib_get_buffer (vm, bi1); + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + icmp0 = ip4_next_header (ip0); + icmp1 = ip4_next_header (ip1); + + vnet_buffer (p0)->sw_if_index[VLIB_RX] = vnet_main.local_interface_sw_if_index; + vnet_buffer (p1)->sw_if_index[VLIB_RX] = vnet_main.local_interface_sw_if_index; + + /* Update ICMP checksum. */ + sum0 = icmp0->checksum; + sum1 = icmp1->checksum; + + ASSERT (icmp0->type == ICMP4_echo_request); + ASSERT (icmp1->type == ICMP4_echo_request); + sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply, + icmp46_header_t, type); + sum1 = ip_csum_update (sum1, ICMP4_echo_request, ICMP4_echo_reply, + icmp46_header_t, type); + icmp0->type = ICMP4_echo_reply; + icmp1->type = ICMP4_echo_reply; + + icmp0->checksum = ip_csum_fold (sum0); + icmp1->checksum = ip_csum_fold (sum1); + + src0 = ip0->src_address.data_u32; + src1 = ip1->src_address.data_u32; + dst0 = ip0->dst_address.data_u32; + dst1 = ip1->dst_address.data_u32; + + /* Swap source and destination address. + Does not change checksum. */ + ip0->src_address.data_u32 = dst0; + ip1->src_address.data_u32 = dst1; + ip0->dst_address.data_u32 = src0; + ip1->dst_address.data_u32 = src1; + + /* Update IP checksum. */ + sum0 = ip0->checksum; + sum1 = ip1->checksum; + + sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl, + ip4_header_t, ttl); + sum1 = ip_csum_update (sum1, ip1->ttl, host_config_ttl, + ip4_header_t, ttl); + ip0->ttl = host_config_ttl; + ip1->ttl = host_config_ttl; + + /* New fragment id. */ + sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0], + ip4_header_t, fragment_id); + sum1 = ip_csum_update (sum1, ip1->fragment_id, fid[1], + ip4_header_t, fragment_id); + ip0->fragment_id = fid[0]; + ip1->fragment_id = fid[1]; + fid += 2; + + ip0->checksum = ip_csum_fold (sum0); + ip1->checksum = ip_csum_fold (sum1); + + ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + ASSERT (ip1->checksum == ip4_header_checksum (ip1)); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + icmp46_header_t * icmp0; + u32 bi0, src0, dst0; + ip_csum_t sum0; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + icmp0 = ip4_next_header (ip0); + + vnet_buffer (p0)->sw_if_index[VLIB_RX] = vnet_main.local_interface_sw_if_index; + + /* Update ICMP checksum. */ + sum0 = icmp0->checksum; + + ASSERT (icmp0->type == ICMP4_echo_request); + sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply, + icmp46_header_t, type); + icmp0->type = ICMP4_echo_reply; + icmp0->checksum = ip_csum_fold (sum0); + + src0 = ip0->src_address.data_u32; + dst0 = ip0->dst_address.data_u32; + ip0->src_address.data_u32 = dst0; + ip0->dst_address.data_u32 = src0; + + /* Update IP checksum. */ + sum0 = ip0->checksum; + + sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl, + ip4_header_t, ttl); + ip0->ttl = host_config_ttl; + + sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0], + ip4_header_t, fragment_id); + ip0->fragment_id = fid[0]; + fid += 1; + + ip0->checksum = ip_csum_fold (sum0); + + ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + vlib_error_count (vm, ip4_icmp_input_node.index, + ICMP4_ERROR_ECHO_REPLIES_SENT, + frame->n_vectors); + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = { + .function = ip4_icmp_echo_request, + .name = "ip4-icmp-echo-request", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp_input_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "ip4-rewrite-local", + }, +}; + +typedef enum { + ICMP4_TTL_EXPIRE_NEXT_DROP, + ICMP4_TTL_EXPIRE_NEXT_LOOKUP, + ICMP4_TTL_EXPIRE_N_NEXT, +} icmp_ttl_expire_next_t; + +static uword +ip4_icmp_ttl_expire (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * from, * to_next; + uword n_left_from, n_left_to_next; + icmp_ttl_expire_next_t next_index; + ip4_main_t *im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, sizeof (icmp_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0 = from[0]; + u32 next0 = ICMP4_TTL_EXPIRE_NEXT_LOOKUP; + u8 error0 = ICMP4_ERROR_TTL_EXPIRE_RESP_SENT; + u32 len0, new_len0; + vlib_buffer_t * p0; + ip4_header_t * ip0, * out_ip0; + icmp46_header_t * icmp0; + ip_csum_t sum; + u32 sw_if_index0, if_add_index0; + + /* Speculatively enqueue p0 to the current next frame */ + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip0 = vlib_buffer_get_current(p0); + len0 = vlib_buffer_length_in_chain (vm, p0); + sw_if_index0 = vnet_buffer(p0)->sw_if_index[VLIB_RX]; + + /* Cut payload to just IP header plus first 8 bytes */ + new_len0 = (ip0->ip_version_and_header_length &0xf)*4 + 8; + if (len0 > new_len0) + { + p0->current_length = new_len0; /* should fit in 1st buffer */ + if (PREDICT_FALSE(p0->total_length_not_including_first_buffer)) + { /* clear current_length of all other buffers in chain */ + vlib_buffer_t *b = p0; + p0->total_length_not_including_first_buffer = 0; + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, b->next_buffer); + b->current_length = 0; + } + } + } + + /* Add IP header and ICMP header including a 4 byte unused field */ + vlib_buffer_advance(p0, + -sizeof(ip4_header_t)-sizeof(icmp46_header_t)-4); + out_ip0 = vlib_buffer_get_current(p0); + icmp0 = (icmp46_header_t *) &out_ip0[1]; + + /* Fill ip header fields */ + out_ip0->ip_version_and_header_length = 0x45; + out_ip0->tos = 0; + out_ip0->length = clib_host_to_net_u16(p0->current_length); + out_ip0->fragment_id = 0; + out_ip0->ttl = 0xff; + out_ip0->protocol = IP_PROTOCOL_ICMP; + out_ip0->dst_address = ip0->src_address; + if_add_index0 = + lm->if_address_pool_index_by_sw_if_index[sw_if_index0]; + if (PREDICT_TRUE(if_add_index0 != ~0)) + { + ip_interface_address_t *if_add = + pool_elt_at_index(lm->if_address_pool, if_add_index0); + ip4_address_t *if_ip = + ip_interface_address_get_address(lm, if_add); + out_ip0->src_address = *if_ip; + vlib_error_count (vm, node->node_index, error0, 1); + } + else /* interface has no IP4 address - should not happen */ + { + next0 = ICMP4_TTL_EXPIRE_NEXT_DROP; + error0 = ICMP4_ERROR_TTL_EXPIRE_RESP_DROP; + } + out_ip0->checksum = ip4_header_checksum(out_ip0); + + /* Fill icmp header fields */ + icmp0->type = ICMP4_time_exceeded; + icmp0->code = ICMP4_time_exceeded_ttl_exceeded_in_transit; + icmp0->checksum = 0; + sum = ip_incremental_checksum( + 0, icmp0, p0->current_length - sizeof(ip4_header_t)); + icmp0->checksum = ~ip_csum_fold(sum); + + /* Update error status */ + p0->error = node->errors[error0]; + + /* Verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip4_icmp_ttl_expire_node) = { + .function = ip4_icmp_ttl_expire, + .name = "ip4-icmp-ttl-expire", + .vector_size = sizeof (u32), + + .n_errors = ARRAY_LEN (icmp_error_strings), + .error_strings = icmp_error_strings, + + .n_next_nodes = ICMP4_TTL_EXPIRE_N_NEXT, + .next_nodes = { + [ICMP4_TTL_EXPIRE_NEXT_DROP] = "error-drop", + [ICMP4_TTL_EXPIRE_NEXT_LOOKUP] = "ip4-lookup", + }, + + .format_trace = format_icmp_input_trace, +}; + + +static uword unformat_icmp_type_and_code (unformat_input_t * input, va_list * args) +{ + icmp46_header_t * h = va_arg (*args, icmp46_header_t *); + icmp4_main_t * cm = &icmp4_main; + u32 i; + + if (unformat_user (input, unformat_vlib_number_by_name, + cm->type_and_code_by_name, &i)) + { + h->type = (i >> 8) & 0xff; + h->code = (i >> 0) & 0xff; + } + else if (unformat_user (input, unformat_vlib_number_by_name, + cm->type_by_name, &i)) + { + h->type = i; + h->code = 0; + } + else + return 0; + + return 1; +} + +static void +icmp4_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, + u32 n_packets) +{ + vlib_main_t * vm = pg->vlib_main; + u32 ip_offset, icmp_offset; + + icmp_offset = g->start_byte_offset; + ip_offset = (g-1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + icmp46_header_t * icmp0; + u32 len0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ASSERT (p0->current_data == 0); + ip0 = (void *) (p0->data + ip_offset); + icmp0 = (void *) (p0->data + icmp_offset); + len0 = clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0); + icmp0->checksum = ~ ip_csum_fold (ip_incremental_checksum (0, icmp0, len0)); + } +} + +typedef struct { + pg_edit_t type, code; + pg_edit_t checksum; +} pg_icmp46_header_t; + +always_inline void +pg_icmp_header_init (pg_icmp46_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, icmp46_header_t, f); + _ (type); + _ (code); + _ (checksum); +#undef _ +} + +static uword +unformat_pg_icmp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_icmp46_header_t * p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t), + &group_index); + pg_icmp_header_init (p); + + p->checksum.type = PG_EDIT_UNSPECIFIED; + + { + icmp46_header_t tmp; + + if (! unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp)) + goto error; + + pg_edit_set_fixed (&p->type, tmp.type); + pg_edit_set_fixed (&p->code, tmp.code); + } + + /* Parse options. */ + while (1) + { + if (unformat (input, "checksum %U", + unformat_pg_edit, + unformat_pg_number, &p->checksum)) + ; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + if (! unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + g->edit_function = icmp4_pg_edit_function; + g->edit_function_opaque = 0; + } + + return 1; + + error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + +void ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, + u32 node_index) +{ + icmp4_main_t * im = &icmp4_main; + + ASSERT (type < ARRAY_LEN (im->ip4_input_next_index_by_type)); + im->ip4_input_next_index_by_type[type] + = vlib_node_add_next (vm, ip4_icmp_input_node.index, node_index); +} + +static clib_error_t * +icmp4_init (vlib_main_t * vm) +{ + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi; + icmp4_main_t * cm = &icmp4_main; + clib_error_t * error; + + error = vlib_call_init_function (vm, ip_main_init); + + if (error) + return error; + + pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP); + pi->format_header = format_ip4_icmp_header; + pi->unformat_pg_edit = unformat_pg_icmp_header; + + cm->type_by_name = hash_create_string (0, sizeof (uword)); +#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n)); + foreach_icmp4_type; +#undef _ + + cm->type_and_code_by_name = hash_create_string (0, sizeof (uword)); +#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP4_##a << 8)); + foreach_icmp4_code; +#undef _ + + memset (cm->ip4_input_next_index_by_type, + ICMP_INPUT_NEXT_ERROR, + sizeof (cm->ip4_input_next_index_by_type)); + + ip4_icmp_register_type (vm, ICMP4_echo_request, ip4_icmp_echo_request_node.index); + + return 0; +} + +VLIB_INIT_FUNCTION (icmp4_init); diff --git a/vnet/vnet/ip/icmp46_packet.h b/vnet/vnet/ip/icmp46_packet.h new file mode 100644 index 00000000000..fa3fed4d081 --- /dev/null +++ b/vnet/vnet/ip/icmp46_packet.h @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * icmp46_packet.h: ip4/ip6 icmp packet format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_icmp46_packet_h +#define included_vnet_icmp46_packet_h + +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip6_packet.h> + +#define foreach_icmp4_type \ + _ (0, echo_reply) \ + _ (3, destination_unreachable) \ + _ (4, source_quench) \ + _ (5, redirect) \ + _ (6, alternate_host_address) \ + _ (8, echo_request) \ + _ (9, router_advertisement) \ + _ (10, router_solicitation) \ + _ (11, time_exceeded) \ + _ (12, parameter_problem) \ + _ (13, timestamp_request) \ + _ (14, timestamp_reply) \ + _ (15, information_request) \ + _ (16, information_reply) \ + _ (17, address_mask_request) \ + _ (18, address_mask_reply) \ + _ (30, traceroute) \ + _ (31, datagram_conversion_error) \ + _ (32, mobile_host_redirect) \ + _ (33, ip6_where_are_you) \ + _ (34, ip6_i_am_here) \ + _ (35, mobile_registration_request) \ + _ (36, mobile_registration_reply) \ + _ (37, domain_name_request) \ + _ (38, domain_name_reply) \ + _ (39, skip) \ + _ (40, photuris) + +#define icmp_no_code 0 + +#define foreach_icmp4_code \ + _ (destination_unreachable, 0, destination_unreachable_net) \ + _ (destination_unreachable, 1, destination_unreachable_host) \ + _ (destination_unreachable, 2, protocol_unreachable) \ + _ (destination_unreachable, 3, port_unreachable) \ + _ (destination_unreachable, 4, fragmentation_needed_and_dont_fragment_set) \ + _ (destination_unreachable, 5, source_route_failed) \ + _ (destination_unreachable, 6, destination_network_unknown) \ + _ (destination_unreachable, 7, destination_host_unknown) \ + _ (destination_unreachable, 8, source_host_isolated) \ + _ (destination_unreachable, 9, network_administratively_prohibited) \ + _ (destination_unreachable, 10, host_administratively_prohibited) \ + _ (destination_unreachable, 11, network_unreachable_for_type_of_service) \ + _ (destination_unreachable, 12, host_unreachable_for_type_of_service) \ + _ (destination_unreachable, 13, communication_administratively_prohibited) \ + _ (destination_unreachable, 14, host_precedence_violation) \ + _ (destination_unreachable, 15, precedence_cutoff_in_effect) \ + _ (redirect, 0, network_redirect) \ + _ (redirect, 1, host_redirect) \ + _ (redirect, 2, type_of_service_and_network_redirect) \ + _ (redirect, 3, type_of_service_and_host_redirect) \ + _ (router_advertisement, 0, normal_router_advertisement) \ + _ (router_advertisement, 16, does_not_route_common_traffic) \ + _ (time_exceeded, 0, ttl_exceeded_in_transit) \ + _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \ + _ (parameter_problem, 0, pointer_indicates_error) \ + _ (parameter_problem, 1, missing_required_option) \ + _ (parameter_problem, 2, bad_length) + +/* ICMPv6 */ +#define foreach_icmp6_type \ + _ (1, destination_unreachable) \ + _ (2, packet_too_big) \ + _ (3, time_exceeded) \ + _ (4, parameter_problem) \ + _ (128, echo_request) \ + _ (129, echo_reply) \ + _ (130, multicast_listener_request) \ + _ (131, multicast_listener_report) \ + _ (132, multicast_listener_done) \ + _ (133, router_solicitation) \ + _ (134, router_advertisement) \ + _ (135, neighbor_solicitation) \ + _ (136, neighbor_advertisement) \ + _ (137, redirect) \ + _ (138, router_renumbering) \ + _ (139, node_information_request) \ + _ (140, node_information_response) \ + _ (141, inverse_neighbor_solicitation) \ + _ (142, inverse_neighbor_advertisement) \ + _ (143, multicast_listener_report_v2) \ + _ (144, home_agent_address_discovery_request) \ + _ (145, home_agent_address_discovery_reply) \ + _ (146, mobile_prefix_solicitation) \ + _ (147, mobile_prefix_advertisement) \ + _ (148, certification_path_solicitation) \ + _ (149, certification_path_advertisement) \ + _ (151, multicast_router_advertisement) \ + _ (152, multicast_router_solicitation) \ + _ (153, multicast_router_termination) \ + _ (154, fmipv6_messages) + +#define foreach_icmp6_code \ + _ (destination_unreachable, 0, no_route_to_destination) \ + _ (destination_unreachable, 1, destination_administratively_prohibited) \ + _ (destination_unreachable, 2, beyond_scope_of_source_address) \ + _ (destination_unreachable, 3, address_unreachable) \ + _ (destination_unreachable, 4, port_unreachable) \ + _ (destination_unreachable, 5, source_address_failed_policy) \ + _ (destination_unreachable, 6, reject_route_to_destination) \ + _ (time_exceeded, 0, ttl_exceeded_in_transit) \ + _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \ + _ (parameter_problem, 0, erroneous_header_field) \ + _ (parameter_problem, 1, unrecognized_next_header) \ + _ (parameter_problem, 2, unrecognized_option) \ + _ (router_renumbering, 0, command) \ + _ (router_renumbering, 1, result) \ + _ (node_information_request, 0, data_contains_ip6_address) \ + _ (node_information_request, 1, data_contains_name) \ + _ (node_information_request, 2, data_contains_ip4_address) \ + _ (node_information_response, 0, success) \ + _ (node_information_response, 1, failed) \ + _ (node_information_response, 2, unknown_request) + +typedef enum { +#define _(n,f) ICMP4_##f = n, + foreach_icmp4_type +#undef _ +} icmp4_type_t; + +typedef enum { +#define _(t,n,f) ICMP4_##t##_##f = n, + foreach_icmp4_code +#undef _ +} icmp4_code_t; + +typedef enum { +#define _(n,f) ICMP6_##f = n, + foreach_icmp6_type +#undef _ +} icmp6_type_t; + +typedef enum { +#define _(t,n,f) ICMP6_##t##_##f = n, + foreach_icmp6_code +#undef _ +} icmp6_code_t; + +typedef CLIB_PACKED (struct { + u8 type; + + u8 code; + + /* IP checksum of icmp header plus data which follows. */ + u16 checksum; +}) icmp46_header_t; + +/* ip6 neighbor discovery */ +#define foreach_icmp6_neighbor_discovery_option \ + _ (1, source_link_layer_address) \ + _ (2, target_link_layer_address) \ + _ (3, prefix_information) \ + _ (4, redirected_header) \ + _ (5, mtu) \ + _ (6, nbma_shortcut_limit) \ + _ (7, advertisement_interval) \ + _ (8, home_agent_information) \ + _ (9, source_address_list) \ + _ (10, target_address_list) \ + _ (11, cryptographically_generated_address) \ + _ (12, rsa_signature) \ + _ (13, timestamp) \ + _ (14, nonce) \ + _ (15, trust_anchor) \ + _ (16, certificate) \ + _ (17, ip_address_and_prefix) \ + _ (18, new_router_prefix_information) \ + _ (19, mobile_link_layer_address) \ + _ (20, neighbor_advertisement_acknowledgment) \ + _ (23, map) \ + _ (24, route_information) \ + _ (25, recursive_dns_server) \ + _ (26, ra_flags_extension) \ + _ (27, handover_key_request) \ + _ (28, handover_key_reply) \ + _ (29, handover_assist_information) \ + _ (30, mobile_node_identifier) \ + _ (31, dns_search_list) \ + _ (138, card_request) \ + _ (139, card_reply) + +typedef enum icmp6_neighbor_discovery_option_type { +#define _(n,f) ICMP6_NEIGHBOR_DISCOVERY_OPTION_##f = n, + foreach_icmp6_neighbor_discovery_option +#undef _ +} icmp6_neighbor_discovery_option_type_t; + +typedef CLIB_PACKED (struct { + /* Option type. */ + u8 type; + + /* Length of this header plus option data in 8 byte units. */ + u8 n_data_u64s; + + /* Option data follows. */ + u8 data[0]; +}) icmp6_neighbor_discovery_option_header_t; + +typedef CLIB_PACKED (struct { + icmp6_neighbor_discovery_option_header_t header; + u8 dst_address_length; + u8 flags; +#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK (1 << 7) +#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO (1 << 6) + u32 valid_time; + u32 preferred_time; + u32 unused; + ip6_address_t dst_address; +}) icmp6_neighbor_discovery_prefix_information_option_t; + +typedef CLIB_PACKED (struct { + u8 type; + u8 aux_data_len_u32s; + u16 num_sources; + ip6_address_t mcast_addr; + ip6_address_t source_addr[0]; +}) icmp6_multicast_address_record_t; + +typedef CLIB_PACKED (struct { + ip6_hop_by_hop_ext_t ext_hdr; + ip6_router_alert_option_t alert; + ip6_padN_option_t pad; + icmp46_header_t icmp; + u16 rsvd; + u16 num_addr_records; + icmp6_multicast_address_record_t records[0]; +}) icmp6_multicast_listener_report_header_t; + +typedef CLIB_PACKED (struct { + icmp6_neighbor_discovery_option_header_t header; + u8 reserved[6]; + /* IP6 header plus payload follows. */ + u8 data[0]; +}) icmp6_neighbor_discovery_redirected_header_option_t; + +typedef CLIB_PACKED (struct { + icmp6_neighbor_discovery_option_header_t header; + u16 unused; + u32 mtu; +}) icmp6_neighbor_discovery_mtu_option_t; + +typedef CLIB_PACKED (struct { + icmp6_neighbor_discovery_option_header_t header; + u8 ethernet_address[6]; +}) icmp6_neighbor_discovery_ethernet_link_layer_address_option_t; + +typedef CLIB_PACKED (struct { + icmp6_neighbor_discovery_option_header_t header; + u8 max_l2_address[6+8]; +}) icmp6_neighbor_discovery_max_link_layer_address_option_t; + +/* Generic neighbor discover header. Used for router solicitations, + etc. */ +typedef CLIB_PACKED (struct { + icmp46_header_t icmp; + + u32 reserved_must_be_zero; +}) icmp6_neighbor_discovery_header_t; + +/* Router advertisement packet formats. */ +typedef CLIB_PACKED (struct { + icmp46_header_t icmp; + + /* Current hop limit to use for outgoing packets. */ + u8 current_hop_limit; + + u8 flags; +#define ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP (1 << 7) +#define ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP (1 << 6) + + /* Zero means unspecified. */ + u16 router_lifetime_in_sec; + + /* Zero means unspecified. */ + u32 neighbor_reachable_time_in_msec; + + /* Zero means unspecified. */ + u32 time_in_msec_between_retransmitted_neighbor_solicitations; + + /* Options that may follow: source_link_layer_address, mtu, prefix_information. */ +}) icmp6_router_advertisement_header_t; + +/* Neighbor solicitation/advertisement header. */ +typedef CLIB_PACKED (struct { + icmp46_header_t icmp; + + /* Zero for solicitation; flags for advertisement. */ + u32 advertisement_flags; + /* Set when sent by a router. */ +#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_ROUTER (1 << 31) + /* Set when response to solicitation. */ +#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED (1 << 30) +#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE (1 << 29) + + ip6_address_t target_address; + + /* Options that may follow: source_link_layer_address + (for solicitation) target_link_layer_address (for advertisement). */ +}) icmp6_neighbor_solicitation_or_advertisement_header_t; + +typedef CLIB_PACKED (struct { + icmp46_header_t icmp; + + u32 reserved_must_be_zero; + + /* Better next hop to use for given destination. */ + ip6_address_t better_next_hop_address; + + ip6_address_t dst_address; + + /* Options that may follow: target_link_layer_address, + redirected_header. */ +}) icmp6_redirect_header_t; + +/* Solicitation/advertisement packet format for ethernet. */ +typedef CLIB_PACKED (struct { + ip6_header_t ip; + + icmp6_neighbor_solicitation_or_advertisement_header_t neighbor; + + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t link_layer_option; +}) icmp6_neighbor_solicitation_header_t; + +/* Router solicitation packet format for ethernet. */ +typedef CLIB_PACKED (struct { + ip6_header_t ip; + icmp6_neighbor_discovery_header_t neighbor; + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t link_layer_option; +}) icmp6_router_solicitation_header_t; + +/* router advertisement packet format for ethernet. */ +typedef CLIB_PACKED (struct { + ip6_header_t ip; + icmp6_router_advertisement_header_t router; + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t link_layer_option; + icmp6_neighbor_discovery_mtu_option_t mtu_option; + icmp6_neighbor_discovery_prefix_information_option_t prefix[0]; +}) icmp6_router_advertisement_packet_t; + +/* multicast listener report packet format for ethernet. */ +typedef CLIB_PACKED (struct { + ip6_header_t ip; + icmp6_multicast_listener_report_header_t report_hdr; +}) icmp6_multicast_listener_report_packet_t; + +#endif /* included_vnet_icmp46_packet_h */ diff --git a/vnet/vnet/ip/icmp6.c b/vnet/vnet/ip/icmp6.c new file mode 100644 index 00000000000..2d265d2b5b2 --- /dev/null +++ b/vnet/vnet/ip/icmp6.c @@ -0,0 +1,814 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/icmp6.c: ip6 icmp + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> + +static u8 * format_ip6_icmp_type_and_code (u8 * s, va_list * args) +{ + icmp6_type_t type = va_arg (*args, int); + u8 code = va_arg (*args, int); + char * t = 0; + +#define _(n,f) case n: t = #f; break; + + switch (type) + { + foreach_icmp6_type; + + default: + break; + } + +#undef _ + + if (! t) + return format (s, "unknown 0x%x", type); + + s = format (s, "%s", t); + + t = 0; + switch ((type << 8) | code) + { +#define _(a,n,f) case (ICMP6_##a << 8) | (n): t = #f; break; + + foreach_icmp6_code; + +#undef _ + } + + if (t) + s = format (s, " %s", t); + + return s; +} + +static u8 * format_icmp6_header (u8 * s, va_list * args) +{ + icmp46_header_t * icmp = va_arg (*args, icmp46_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + + /* Nothing to do. */ + if (max_header_bytes < sizeof (icmp[0])) + return format (s, "ICMP header truncated"); + + s = format (s, "ICMP %U checksum 0x%x", + format_ip6_icmp_type_and_code, icmp->type, icmp->code, + clib_net_to_host_u16 (icmp->checksum)); + + if (max_header_bytes >= + sizeof(icmp6_neighbor_solicitation_or_advertisement_header_t) && + (icmp->type == ICMP6_neighbor_solicitation || + icmp->type == ICMP6_neighbor_advertisement)) + { + icmp6_neighbor_solicitation_or_advertisement_header_t *icmp6_nd = + (icmp6_neighbor_solicitation_or_advertisement_header_t *) icmp; + s = format (s, "\n target address %U", + format_ip6_address, &icmp6_nd->target_address); + } + + return s; +} + +u8 * format_icmp6_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + icmp6_input_trace_t * t = va_arg (*va, icmp6_input_trace_t *); + + s = format (s, "%U", + format_ip6_header, + t->packet_data, sizeof (t->packet_data)); + + return s; +} + +static char * icmp_error_strings[] = { +#define _(f,s) s, + foreach_icmp6_error +#undef _ +}; + +typedef enum { + ICMP_INPUT_NEXT_DROP, + ICMP_INPUT_N_NEXT, +} icmp_input_next_t; + +typedef struct { + uword * type_and_code_by_name; + + uword * type_by_name; + + /* Vector dispatch table indexed by [icmp type]. */ + u8 input_next_index_by_type[256]; + + /* Max valid code indexed by icmp type. */ + u8 max_valid_code_by_type[256]; + + /* hop_limit must be >= this value for this icmp type. */ + u8 min_valid_hop_limit_by_type[256]; + + u8 min_valid_length_by_type[256]; +} icmp6_main_t; + +icmp6_main_t icmp6_main; + +static uword +ip6_icmp_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + icmp6_main_t * im = &icmp6_main; + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp6_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + ip6_header_t * ip0; + icmp46_header_t * icmp0; + icmp6_type_t type0; + u32 bi0, next0, error0, len0; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + icmp0 = ip6_next_header (ip0); + type0 = icmp0->type; + + error0 = ICMP6_ERROR_NONE; + + next0 = im->input_next_index_by_type[type0]; + error0 = next0 == ICMP_INPUT_NEXT_DROP ? ICMP6_ERROR_UNKNOWN_TYPE : error0; + + /* Check code is valid for type. */ + error0 = icmp0->code > im->max_valid_code_by_type[type0] ? ICMP6_ERROR_INVALID_CODE_FOR_TYPE : error0; + + /* Checksum is already validated by ip6_local node so we don't need to check that. */ + + /* Check that hop limit == 255 for certain types. */ + error0 = ip0->hop_limit < im->min_valid_hop_limit_by_type[type0] ? ICMP6_ERROR_INVALID_HOP_LIMIT_FOR_TYPE : error0; + + len0 = clib_net_to_host_u16 (ip0->payload_length); + error0 = len0 < im->min_valid_length_by_type[type0] ? ICMP6_ERROR_LENGTH_TOO_SMALL_FOR_TYPE : error0; + + b0->error = node->errors[error0]; + + next0 = error0 != ICMP6_ERROR_NONE ? ICMP_INPUT_NEXT_DROP : next0; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip6_icmp_input_node) = { + .function = ip6_icmp_input, + .name = "ip6-icmp-input", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_errors = ARRAY_LEN (icmp_error_strings), + .error_strings = icmp_error_strings, + + .n_next_nodes = 1, + .next_nodes = { + [ICMP_INPUT_NEXT_DROP] = "error-drop", + }, +}; + +typedef enum { + ICMP6_ECHO_REQUEST_NEXT_LOOKUP, + ICMP6_ECHO_REQUEST_NEXT_OUTPUT, + ICMP6_ECHO_REQUEST_N_NEXT, +} icmp6_echo_request_next_t; + +static uword +ip6_icmp_echo_request (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next_index; + ip6_main_t * im = &ip6_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp6_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 2 && n_left_to_next > 2) + { + vlib_buffer_t * p0, * p1; + ip6_header_t * ip0, * ip1; + icmp46_header_t * icmp0, * icmp1; + ip6_address_t tmp0, tmp1; + ip_csum_t sum0, sum1; + u32 bi0, bi1; + u32 fib_index0, fib_index1; + u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP; + u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP; + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, bi0); + p1 = vlib_get_buffer (vm, bi1); + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + icmp0 = ip6_next_header (ip0); + icmp1 = ip6_next_header (ip1); + + /* Check icmp type to echo reply and update icmp checksum. */ + sum0 = icmp0->checksum; + sum1 = icmp1->checksum; + + ASSERT (icmp0->type == ICMP6_echo_request); + ASSERT (icmp1->type == ICMP6_echo_request); + sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply, + icmp46_header_t, type); + sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply, + icmp46_header_t, type); + + icmp0->checksum = ip_csum_fold (sum0); + icmp1->checksum = ip_csum_fold (sum1); + + icmp0->type = ICMP6_echo_reply; + icmp1->type = ICMP6_echo_reply; + + /* Swap source and destination address. */ + tmp0 = ip0->src_address; + tmp1 = ip1->src_address; + + ip0->src_address = ip0->dst_address; + ip1->src_address = ip1->dst_address; + + ip0->dst_address = tmp0; + ip1->dst_address = tmp1; + + /* New hop count. */ + ip0->hop_limit = im->host_config.ttl; + ip1->hop_limit = im->host_config.ttl; + + if (ip6_address_is_link_local_unicast (&ip0->dst_address)) + { + ethernet_header_t *eth0; + u8 tmp_mac[6]; + /* For link local, reuse current MAC header by sawpping + * SMAC to DMAC instead of IP6 lookup since link local + * is not in the IP6 FIB */ + vlib_buffer_reset (p0); + eth0 = vlib_buffer_get_current (p0); + memcpy (tmp_mac, eth0->dst_address, 6); + memcpy (eth0->dst_address, eth0->src_address, 6); + memcpy (eth0->src_address, tmp_mac, 6); + vnet_buffer(p0)->sw_if_index[VLIB_TX] = + vnet_buffer (p0)->sw_if_index[VLIB_RX]; + next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT; + } + else + { + /* Determine the correct lookup fib indices... */ + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p0)->sw_if_index[VLIB_RX]); + vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0; + } + + if (ip6_address_is_link_local_unicast (&ip1->dst_address)) + { + ethernet_header_t *eth1; + u8 tmp_mac[6]; + /* For link local, reuse current MAC header by sawpping + * SMAC to DMAC instead of IP6 lookup since link local + * is not in the IP6 FIB */ + vlib_buffer_reset (p1); + eth1 = vlib_buffer_get_current (p1); + memcpy (tmp_mac, eth1->dst_address, 6); + memcpy (eth1->dst_address, eth1->src_address, 6); + memcpy (eth1->src_address, tmp_mac, 6); + vnet_buffer(p1)->sw_if_index[VLIB_TX] = + vnet_buffer (p1)->sw_if_index[VLIB_RX]; + next1 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT; + } + else + { + /* Determine the correct lookup fib indices... */ + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p1)->sw_if_index[VLIB_RX]); + vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1; + } + + vnet_buffer (p0)->sw_if_index[VLIB_RX] + = vnet_main.local_interface_sw_if_index; + vnet_buffer (p1)->sw_if_index[VLIB_RX] + = vnet_main.local_interface_sw_if_index; + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip0; + icmp46_header_t * icmp0; + u32 bi0; + ip6_address_t tmp0; + ip_csum_t sum0; + u32 fib_index0; + u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + icmp0 = ip6_next_header (ip0); + + /* Check icmp type to echo reply and update icmp checksum. */ + sum0 = icmp0->checksum; + + ASSERT (icmp0->type == ICMP6_echo_request); + sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply, + icmp46_header_t, type); + + icmp0->checksum = ip_csum_fold (sum0); + + icmp0->type = ICMP6_echo_reply; + + /* Swap source and destination address. */ + tmp0 = ip0->src_address; + ip0->src_address = ip0->dst_address; + ip0->dst_address = tmp0; + + ip0->hop_limit = im->host_config.ttl; + + if (ip6_address_is_link_local_unicast (&ip0->dst_address)) + { + ethernet_header_t *eth0; + u8 tmp_mac[6]; + /* For link local, reuse current MAC header by sawpping + * SMAC to DMAC instead of IP6 lookup since link local + * is not in the IP6 FIB */ + vlib_buffer_reset (p0); + eth0 = vlib_buffer_get_current (p0); + memcpy (tmp_mac, eth0->dst_address, 6); + memcpy (eth0->dst_address, eth0->src_address, 6); + memcpy (eth0->src_address, tmp_mac, 6); + vnet_buffer(p0)->sw_if_index[VLIB_TX] = + vnet_buffer (p0)->sw_if_index[VLIB_RX]; + next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT; + } + else + { + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p0)->sw_if_index[VLIB_RX]); + vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0; + } + vnet_buffer (p0)->sw_if_index[VLIB_RX] + = vnet_main.local_interface_sw_if_index; + + /* Verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_error_count (vm, ip6_icmp_input_node.index, + ICMP6_ERROR_ECHO_REPLIES_SENT, + frame->n_vectors); + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = { + .function = ip6_icmp_echo_request, + .name = "ip6-icmp-echo-request", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT, + .next_nodes = { + [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup", + [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output", + }, +}; + +typedef enum { + ICMP6_TTL_EXPIRE_NEXT_DROP, + ICMP6_TTL_EXPIRE_NEXT_LOOKUP, + ICMP6_TTL_EXPIRE_N_NEXT, +} icmp_ttl_expire_next_t; + +static uword +ip6_icmp_ttl_expire (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * from, * to_next; + uword n_left_from, n_left_to_next; + icmp_ttl_expire_next_t next_index; + ip6_main_t *im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, sizeof (icmp6_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0 = from[0]; + u32 next0 = ICMP6_TTL_EXPIRE_NEXT_LOOKUP; + u8 error0 = ICMP6_ERROR_TTL_EXPIRE_RESP_SENT; + vlib_buffer_t * p0; + ip6_header_t * ip0, * out_ip0; + icmp46_header_t * icmp0; + u32 sw_if_index0, if_add_index0; + int bogus_length; + + /* Speculatively enqueue p0 to the current next frame */ + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip0 = vlib_buffer_get_current(p0); + sw_if_index0 = vnet_buffer(p0)->sw_if_index[VLIB_RX]; + + /* RFC2463 says to keep as much of the original packet as possible + * within the MTU. We cheat "a little" here by keeping whatever fits + * in the first buffer, to be more efficient */ + if (PREDICT_FALSE(p0->total_length_not_including_first_buffer)) + { /* clear current_length of all other buffers in chain */ + vlib_buffer_t *b = p0; + p0->total_length_not_including_first_buffer = 0; + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, b->next_buffer); + b->current_length = 0; + } + } + + /* Add IP header and ICMPv6 header including a 4 byte ununsed field */ + vlib_buffer_advance(p0, + -sizeof(ip6_header_t)-sizeof(icmp46_header_t)-4); + out_ip0 = vlib_buffer_get_current(p0); + icmp0 = (icmp46_header_t *) &out_ip0[1]; + + /* Fill ip header fields */ + out_ip0->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32(0x6<<28); + out_ip0->payload_length = + clib_host_to_net_u16(p0->current_length - sizeof(ip6_header_t)); + out_ip0->protocol = IP_PROTOCOL_ICMP6; + out_ip0->hop_limit = 0xff; + out_ip0->dst_address = ip0->src_address; + if_add_index0 = + lm->if_address_pool_index_by_sw_if_index[sw_if_index0]; + if (PREDICT_TRUE(if_add_index0 != ~0)) + { + ip_interface_address_t *if_add = + pool_elt_at_index(lm->if_address_pool, if_add_index0); + ip6_address_t *if_ip = + ip_interface_address_get_address(lm, if_add); + out_ip0->src_address = *if_ip; + vlib_error_count (vm, node->node_index, error0, 1); + } + else /* interface has no IP6 address - should not happen */ + { + next0 = ICMP6_TTL_EXPIRE_NEXT_DROP; + error0 = ICMP6_ERROR_TTL_EXPIRE_RESP_DROP; + } + + /* Fill icmp header fields */ + icmp0->type = ICMP6_time_exceeded; + icmp0->code = ICMP6_time_exceeded_ttl_exceeded_in_transit; + icmp0->checksum = 0; + icmp0->checksum = ip6_tcp_udp_icmp_compute_checksum( + vm, p0, out_ip0, &bogus_length); + + /* Update error status */ + p0->error = node->errors[error0]; + + /* Verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip6_icmp_ttl_expire_node) = { + .function = ip6_icmp_ttl_expire, + .name = "ip6-icmp-ttl-expire", + .vector_size = sizeof (u32), + + .n_errors = ARRAY_LEN (icmp_error_strings), + .error_strings = icmp_error_strings, + + .n_next_nodes = ICMP6_TTL_EXPIRE_N_NEXT, + .next_nodes = { + [ICMP6_TTL_EXPIRE_NEXT_DROP] = "error-drop", + [ICMP6_TTL_EXPIRE_NEXT_LOOKUP] = "ip6-lookup", + }, + + .format_trace = format_icmp6_input_trace, +}; + + +static uword unformat_icmp_type_and_code (unformat_input_t * input, va_list * args) +{ + icmp46_header_t * h = va_arg (*args, icmp46_header_t *); + icmp6_main_t * cm = &icmp6_main; + u32 i; + + if (unformat_user (input, unformat_vlib_number_by_name, + cm->type_and_code_by_name, &i)) + { + h->type = (i >> 8) & 0xff; + h->code = (i >> 0) & 0xff; + } + else if (unformat_user (input, unformat_vlib_number_by_name, + cm->type_by_name, &i)) + { + h->type = i; + h->code = 0; + } + else + return 0; + + return 1; +} + +static void +icmp6_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, + u32 n_packets) +{ + vlib_main_t * vm = pg->vlib_main; + u32 ip_offset, icmp_offset; + int bogus_length; + + icmp_offset = g->start_byte_offset; + ip_offset = (g-1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t * p0; + ip6_header_t * ip0; + icmp46_header_t * icmp0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ASSERT (p0->current_data == 0); + ip0 = (void *) (p0->data + ip_offset); + icmp0 = (void *) (p0->data + icmp_offset); + + icmp0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, + &bogus_length); + ASSERT (bogus_length == 0); + } +} + +typedef struct { + pg_edit_t type, code; + pg_edit_t checksum; +} pg_icmp46_header_t; + +always_inline void +pg_icmp_header_init (pg_icmp46_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, icmp46_header_t, f); + _ (type); + _ (code); + _ (checksum); +#undef _ +} + +static uword +unformat_pg_icmp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_icmp46_header_t * p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t), + &group_index); + pg_icmp_header_init (p); + + p->checksum.type = PG_EDIT_UNSPECIFIED; + + { + icmp46_header_t tmp; + + if (! unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp)) + goto error; + + pg_edit_set_fixed (&p->type, tmp.type); + pg_edit_set_fixed (&p->code, tmp.code); + } + + /* Parse options. */ + while (1) + { + if (unformat (input, "checksum %U", + unformat_pg_edit, + unformat_pg_number, &p->checksum)) + ; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + if (! unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + g->edit_function = icmp6_pg_edit_function; + g->edit_function_opaque = 0; + } + + return 1; + + error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + +void icmp6_register_type (vlib_main_t * vm, icmp6_type_t type, u32 node_index) +{ + icmp6_main_t * im = &icmp6_main; + + ASSERT (type < ARRAY_LEN (im->input_next_index_by_type)); + im->input_next_index_by_type[type] + = vlib_node_add_next (vm, ip6_icmp_input_node.index, node_index); +} + +static clib_error_t * +icmp6_init (vlib_main_t * vm) +{ + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi; + icmp6_main_t * cm = &icmp6_main; + clib_error_t * error; + + error = vlib_call_init_function (vm, ip_main_init); + + if (error) + return error; + + pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP6); + pi->format_header = format_icmp6_header; + pi->unformat_pg_edit = unformat_pg_icmp_header; + + cm->type_by_name = hash_create_string (0, sizeof (uword)); +#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n)); + foreach_icmp6_type; +#undef _ + + cm->type_and_code_by_name = hash_create_string (0, sizeof (uword)); +#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP6_##a << 8)); + foreach_icmp6_code; +#undef _ + + memset (cm->input_next_index_by_type, + ICMP_INPUT_NEXT_DROP, + sizeof (cm->input_next_index_by_type)); + memset (cm->max_valid_code_by_type, 0, sizeof (cm->max_valid_code_by_type)); + +#define _(a,n,t) cm->max_valid_code_by_type[ICMP6_##a] = clib_max (cm->max_valid_code_by_type[ICMP6_##a], n); + foreach_icmp6_code; +#undef _ + + memset (cm->min_valid_hop_limit_by_type, 0, sizeof (cm->min_valid_hop_limit_by_type)); + cm->min_valid_hop_limit_by_type[ICMP6_router_solicitation] = 255; + cm->min_valid_hop_limit_by_type[ICMP6_router_advertisement] = 255; + cm->min_valid_hop_limit_by_type[ICMP6_neighbor_solicitation] = 255; + cm->min_valid_hop_limit_by_type[ICMP6_neighbor_advertisement] = 255; + cm->min_valid_hop_limit_by_type[ICMP6_redirect] = 255; + + memset (cm->min_valid_length_by_type, sizeof (icmp46_header_t), sizeof (cm->min_valid_length_by_type)); + cm->min_valid_length_by_type[ICMP6_router_solicitation] = sizeof (icmp6_neighbor_discovery_header_t); + cm->min_valid_length_by_type[ICMP6_router_advertisement] = sizeof (icmp6_router_advertisement_header_t); + cm->min_valid_length_by_type[ICMP6_neighbor_solicitation] + = sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t); + cm->min_valid_length_by_type[ICMP6_neighbor_advertisement] + = sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t); + cm->min_valid_length_by_type[ICMP6_redirect] = sizeof (icmp6_redirect_header_t); + + icmp6_register_type (vm, ICMP6_echo_request, ip6_icmp_echo_request_node.index); + + return vlib_call_init_function (vm, ip6_neighbor_init); +} + +VLIB_INIT_FUNCTION (icmp6_init); diff --git a/vnet/vnet/ip/icmp6.h b/vnet/vnet/ip/icmp6.h new file mode 100644 index 00000000000..92f6913a454 --- /dev/null +++ b/vnet/vnet/ip/icmp6.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_icmp6_h +#define included_vnet_icmp6_h + +#define foreach_icmp6_error \ + _ (NONE, "valid packets") \ + _ (UNKNOWN_TYPE, "unknown type") \ + _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \ + _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \ + _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \ + _ (OPTIONS_WITH_ODD_LENGTH, \ + "total option length not multiple of 8 bytes") \ + _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \ + _ (ECHO_REPLIES_SENT, "echo replies sent") \ + _ (NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK, \ + "neighbor solicitations from source not on link") \ + _ (NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN, \ + "neighbor solicitations for unknown targets") \ + _ (NEIGHBOR_ADVERTISEMENTS_TX, "neighbor advertisements sent") \ + _ (NEIGHBOR_ADVERTISEMENTS_RX, "neighbor advertisements received") \ + _ (ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK, \ + "router solicitations from source not on link") \ + _ (ROUTER_SOLICITATION_UNSUPPORTED_INTF, \ + "neighbor discovery unsupported interface") \ + _ (ROUTER_SOLICITATION_RADV_NOT_CONFIG, \ + "neighbor discovery not configured") \ + _ (ROUTER_SOLICITATION_DEST_UNKNOWN, \ + "router solicitations for unknown destination") \ + _ (ROUTER_SOLICITATION_SOURCE_UNKNOWN, \ + "router solicitations for unknown source") \ + _ (ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL, \ + "router advertisement source not link local") \ + _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \ + _ (ROUTER_ADVERTISEMENTS_RX, "router advertisements received") \ + _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \ + _ (TTL_EXPIRE_RESP_SENT, "TTL time exceeded response sent") \ + _ (TTL_EXPIRE_RESP_DROP, "TTL time exceeded response dropped") + + +typedef enum { +#define _(f,s) ICMP6_ERROR_##f, + foreach_icmp6_error +#undef _ +} icmp6_error_t; + +typedef struct { + u8 packet_data[64]; +} icmp6_input_trace_t; + +format_function_t format_icmp6_input_trace; +void icmp6_register_type (vlib_main_t * vm, icmp6_type_t type, u32 node_index); + +extern vlib_node_registration_t ip6_icmp_input_node; + +#endif /* included_vnet_icmp6_h */ + + diff --git a/vnet/vnet/ip/igmp_packet.h b/vnet/vnet/ip/igmp_packet.h new file mode 100644 index 00000000000..00b1e0deeb7 --- /dev/null +++ b/vnet/vnet/ip/igmp_packet.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * igmp_packet.h: igmp packet format + * + * Copyright (c) 2011 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_igmp_packet_h +#define included_vnet_igmp_packet_h + +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> + +#define foreach_igmp_type \ + _ (0x11, membership_query) \ + _ (0x12, membership_report_v1) \ + _ (0x13, dvmrp) \ + _ (0x14, pim_v1) \ + _ (0x15, cisco_trace) \ + _ (0x16, membership_report_v2) \ + _ (0x17, leave_group_v2) \ + _ (0x1e, traceroute_response) \ + _ (0x1f, traceroute_request) \ + _ (0x22, membership_report_v3) \ + _ (0x30, router_advertisement) \ + _ (0x31, router_solicitation) \ + _ (0x32, router_termination) + +typedef enum { +#define _(n,f) IGMP_TYPE_##f = n, + foreach_igmp_type +#undef _ +} igmp_type_t; + +typedef struct { + igmp_type_t type : 8; + + u8 code; + + u16 checksum; +} igmp_header_t; + +typedef struct { + /* membership_query, version <= 2 reports. */ + igmp_header_t header; + + /* Multicast destination address. */ + ip4_address_t dst; +} igmp_message_t; + +#define foreach_igmp_membership_group_v3_type \ + _ (1, mode_is_filter_include) \ + _ (2, mode_is_filter_exclude) \ + _ (3, change_to_filter_include) \ + _ (4, change_to_filter_exclude) \ + _ (5, allow_new_sources) \ + _ (6, block_old_sources) + +typedef enum { +#define _(n,f) IGMP_MEMBERSHIP_GROUP_##f = n, + foreach_igmp_membership_group_v3_type +#undef _ +} igmp_membership_group_v3_type_t; + +typedef struct { + igmp_membership_group_v3_type_t type : 8; + + /* Number of 32 bit words of aux data after source addresses. */ + u8 n_aux_u32s; + + /* Number of source addresses that follow. */ + u16 n_src_addresses; + + /* Destination multicast address. */ + ip4_address_t dst_address; + + ip4_address_t src_addresses[0]; +} igmp_membership_group_v3_t; + +always_inline igmp_membership_group_v3_t * +igmp_membership_group_v3_next (igmp_membership_group_v3_t * g) +{ + return ((void *) g + + g->n_src_addresses * sizeof (g->src_addresses[0]) + + g->n_aux_u32s * sizeof (u32)); +} + +typedef struct { + /* Type 0x22. */ + igmp_header_t header; + + u16 unused; + + /* Number of groups which follow. */ + u16 n_groups; + + igmp_membership_group_v3_t groups[0]; +} igmp_membership_report_v3_t; + +/* IP6 flavor of IGMP is called MLD which is embedded in ICMP6. */ +typedef struct { + /* Preceeded by ICMP v6 header. */ + u16 max_response_delay_in_milliseconds; + u16 reserved; + ip6_address_t dst; +} mld_header_t; + +#endif /* included_vnet_igmp_packet_h */ diff --git a/vnet/vnet/ip/ip.h b/vnet/vnet/ip/ip.h new file mode 100644 index 00000000000..e47512a960d --- /dev/null +++ b/vnet/vnet/ip/ip.h @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip.h: ip generic (4 or 6) main + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_main_h +#define included_ip_main_h + +#include <vppinfra/hash.h> +#include <vppinfra/heap.h> /* adjacency heap */ + +#include <vnet/vnet.h> + +#include <vnet/ip/format.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/lookup.h> + +#include <vnet/ip/tcp_packet.h> +#include <vnet/ip/udp_packet.h> +#include <vnet/ip/icmp46_packet.h> + +#include <vnet/ip/ip4.h> +#include <vnet/ip/ip4_error.h> +#include <vnet/ip/ip4_packet.h> + +#include <vnet/ip/ip6.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/ip/ip6_error.h> +#include <vnet/ip/icmp6.h> + +#include <vnet/ip/tcp.h> + +#if DPDK > 0 +#include <vnet/devices/dpdk/dpdk.h> +#endif + +#include <vnet/classify/vnet_classify.h> + +typedef union { + ip4_address_t ip4; + ip6_address_t ip6; +} ip46_address_t; + +/* Per protocol info. */ +typedef struct { + /* Protocol name (also used as hash key). */ + u8 * name; + + /* Protocol number. */ + ip_protocol_t protocol; + + /* Format function for this IP protocol. */ + format_function_t * format_header; + + /* Parser for header. */ + unformat_function_t * unformat_header; + + /* Parser for per-protocol matches. */ + unformat_function_t * unformat_match; + + /* Parser for packet generator edits for this protocol. */ + unformat_function_t * unformat_pg_edit; +} ip_protocol_info_t; + +/* Per TCP/UDP port info. */ +typedef struct { + /* Port name (used as hash key). */ + u8 * name; + + /* UDP/TCP port number in network byte order. */ + u16 port; + + /* Port specific format function. */ + format_function_t * format_header; + + /* Parser for packet generator edits for this protocol. */ + unformat_function_t * unformat_pg_edit; +} tcp_udp_port_info_t; + +typedef struct { + /* Per IP protocol info. */ + ip_protocol_info_t * protocol_infos; + + /* Protocol info index hashed by 8 bit IP protocol. */ + uword * protocol_info_by_protocol; + + /* Hash table mapping IP protocol name (see protocols.def) + to protocol number. */ + uword * protocol_info_by_name; + + /* Per TCP/UDP port info. */ + tcp_udp_port_info_t * port_infos; + + /* Hash table from network-byte-order port to port info index. */ + uword * port_info_by_port; + + /* Hash table mapping TCP/UDP name to port info index. */ + uword * port_info_by_name; +} ip_main_t; + +extern ip_main_t ip_main; + +clib_error_t * +ip_main_init (vlib_main_t * vm); + +static inline ip_protocol_info_t * +ip_get_protocol_info (ip_main_t * im, u32 protocol) +{ + uword * p; + + p = hash_get (im->protocol_info_by_protocol, protocol); + return p ? vec_elt_at_index (im->protocol_infos, p[0]) : 0; +} + +static inline tcp_udp_port_info_t * +ip_get_tcp_udp_port_info (ip_main_t * im, u32 port) +{ + uword * p; + + p = hash_get (im->port_info_by_port, port); + return p ? vec_elt_at_index (im->port_infos, p[0]) : 0; +} + +always_inline ip_csum_t +ip_incremental_checksum_buffer (vlib_main_t * vm, vlib_buffer_t * first_buffer, + u32 first_buffer_offset, + u32 n_bytes_to_checksum, + ip_csum_t sum) +#if DPDK > 0 +{ + u32 n_bytes_left = n_bytes_to_checksum; + struct rte_mbuf * mb = ((struct rte_mbuf *)first_buffer)-1; + u8 nb_segs = mb->nb_segs; + ASSERT(mb->data_len >= first_buffer_offset); + void * h; + u32 n; + + n = clib_min (n_bytes_left, mb->data_len); + h = vlib_buffer_get_current (first_buffer) + first_buffer_offset; + while (n_bytes_left) + { + sum = ip_incremental_checksum (sum, h, n); + n_bytes_left -= n; + nb_segs--; + mb = mb->next; + if ((nb_segs == 0) || (mb == 0)) + break; + + n = clib_min (n_bytes_left, mb->data_len); + h = rte_ctrlmbuf_data(mb); + } + + ASSERT(n_bytes_left == 0); + ASSERT(nb_segs == 0); + return sum; +} +#else +{ + vlib_buffer_t * b = first_buffer; + u32 n_bytes_left = n_bytes_to_checksum; + ASSERT (b->current_length >= first_buffer_offset); + void * h; + u32 n; + + n = clib_min (n_bytes_left, b->current_length); + h = vlib_buffer_get_current (b) + first_buffer_offset; + sum = ip_incremental_checksum (sum, h, n); + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + while (1) + { + n_bytes_left -= n; + if (n_bytes_left == 0) + break; + b = vlib_get_buffer (vm, b->next_buffer); + n = clib_min (n_bytes_left, b->current_length); + h = vlib_buffer_get_current (b); + sum = ip_incremental_checksum (sum, h, n); + } + } + + return sum; +} +#endif /* DPDK */ + +void ip_del_all_interface_addresses (vlib_main_t *vm, u32 sw_if_index); + +#endif /* included_ip_main_h */ diff --git a/vnet/vnet/ip/ip4.h b/vnet/vnet/ip/ip4.h new file mode 100644 index 00000000000..6b8fd59a022 --- /dev/null +++ b/vnet/vnet/ip/ip4.h @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4.h: ip4 main include file + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_ip4_h +#define included_ip_ip4_h + +#include <vnet/ip/ip4_mtrie.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/lookup.h> + +typedef struct ip4_fib_t { + /* Hash table for each prefix length mapping. */ + uword * adj_index_by_dst_address[33]; + + /* Temporary vectors for holding new/old values for hash_set. */ + uword * new_hash_values, * old_hash_values; + + /* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */ + ip4_fib_mtrie_t mtrie; + + /* Table ID (hash key) for this FIB. */ + u32 table_id; + + /* Index into FIB vector. */ + u32 index; + + /* flow hash configuration */ + u32 flow_hash_config; + + /* N-tuple classifier indices */ + u32 fwd_classify_table_index; + u32 rev_classify_table_index; + +} ip4_fib_t; + +struct ip4_main_t; + +typedef void (ip4_add_del_route_function_t) + (struct ip4_main_t * im, + uword opaque, + ip4_fib_t * fib, + u32 flags, + ip4_address_t * address, + u32 address_length, + void * old_result, + void * new_result); + +typedef struct { + ip4_add_del_route_function_t * function; + uword required_flags; + uword function_opaque; +} ip4_add_del_route_callback_t; + +typedef void (ip4_add_del_interface_address_function_t) + (struct ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, + u32 is_del); + +typedef struct { + ip4_add_del_interface_address_function_t * function; + uword function_opaque; +} ip4_add_del_interface_address_callback_t; + +typedef enum { + /* First check access list to either permit or deny this + packet based on classification. */ + IP4_RX_FEATURE_CHECK_ACCESS, + + /* RPF check: verify that source address is reachable via + RX interface or via any interface. */ + IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX, + IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY, + + /* IPSec */ + IP4_RX_FEATURE_IPSEC, + + /* vPath forwarding: won't return to call next feature + so any feature needed before vPath forwarding must be prior + to this entry */ + IP4_RX_FEATURE_VPATH, + + /* Must be last: perform forwarding lookup. */ + IP4_RX_FEATURE_LOOKUP, + + IP4_N_RX_FEATURE, +} ip4_rx_feature_type_t; + +typedef struct ip4_main_t { + ip_lookup_main_t lookup_main; + + /* Vector of FIBs. */ + ip4_fib_t * fibs; + + u32 fib_masks[33]; + + /* Table index indexed by software interface. */ + u32 * fib_index_by_sw_if_index; + + /* Hash table mapping table id to fib index. + ID space is not necessarily dense; index space is dense. */ + uword * fib_index_by_table_id; + + /* Vector of functions to call when routes are added/deleted. */ + ip4_add_del_route_callback_t * add_del_route_callbacks; + + /* Hash table mapping interface route rewrite adjacency index by sw if index. */ + uword * interface_route_adj_index_by_sw_if_index; + + /* Functions to call when interface address changes. */ + ip4_add_del_interface_address_callback_t * add_del_interface_address_callbacks; + + /* Template used to generate IP4 ARP packets. */ + vlib_packet_template_t ip4_arp_request_packet_template; + + /* Seed for Jenkins hash used to compute ip4 flow hash. */ + u32 flow_hash_seed; + + struct { + /* TTL to use for host generated packets. */ + u8 ttl; + + /* TOS byte to use for host generated packets. */ + u8 tos; + + u8 pad[2]; + } host_config; +} ip4_main_t; + +/* Global ip4 main structure. */ +extern ip4_main_t ip4_main; + +/* Global ip4 input node. Errors get attached to ip4 input node. */ +extern vlib_node_registration_t ip4_input_node; +extern vlib_node_registration_t ip4_lookup_node; +extern vlib_node_registration_t ip4_rewrite_node; +extern vlib_node_registration_t ip4_arp_node; + +u32 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index, ip4_address_t * dst, + u32 disable_default_route); + +always_inline u32 +ip4_fib_lookup_buffer (ip4_main_t * im, u32 fib_index, ip4_address_t * dst, + vlib_buffer_t * b) +{ + return ip4_fib_lookup_with_table (im, fib_index, dst, + /* disable_default_route */ 0); +} + +always_inline u32 +ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst) +{ + u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); + return ip4_fib_lookup_with_table (im, fib_index, dst, + /* disable_default_route */ 0); +} + +always_inline uword +ip4_destination_matches_route (ip4_main_t * im, + ip4_address_t * key, + ip4_address_t * dest, + uword dest_length) +{ return 0 == ((key->data_u32 ^ dest->data_u32) & im->fib_masks[dest_length]); } + +always_inline uword +ip4_destination_matches_interface (ip4_main_t * im, + ip4_address_t * key, + ip_interface_address_t * ia) +{ + ip4_address_t * a = ip_interface_address_get_address (&im->lookup_main, ia); + return ip4_destination_matches_route (im, key, a, ia->address_length); +} + +/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */ +always_inline uword +ip4_unaligned_destination_matches_route (ip4_main_t * im, + ip4_address_t * key, + ip4_address_t * dest, + uword dest_length) +{ return 0 == ((clib_mem_unaligned (&key->data_u32, u32) ^ dest->data_u32) & im->fib_masks[dest_length]); } + +always_inline void +ip4_src_address_for_packet (ip4_main_t * im, vlib_buffer_t * p, ip4_address_t * src, u32 sw_if_index) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = ip_interface_address_for_packet (lm, p, sw_if_index); + ip4_address_t * a = ip_interface_address_get_address (lm, ia); + *src = a[0]; +} + +/* Find interface address which matches destination. */ +always_inline ip4_address_t * +ip4_interface_address_matching_destination (ip4_main_t * im, ip4_address_t * dst, u32 sw_if_index, + ip_interface_address_t ** result_ia) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia; + ip4_address_t * result = 0; + + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip4_address_t * a = ip_interface_address_get_address (lm, ia); + if (ip4_destination_matches_route (im, dst, a, ia->address_length)) + { + result = a; + break; + } + })); + if (result_ia) + *result_ia = result ? ia : 0; + return result; +} + +clib_error_t * +ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, + ip4_address_t * address, u32 address_length, + u32 is_del); + +int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2); + +/* Add/del a route to the FIB. */ + +#define IP4_ROUTE_FLAG_ADD (0 << 0) +#define IP4_ROUTE_FLAG_DEL (1 << 0) +#define IP4_ROUTE_FLAG_TABLE_ID (0 << 1) +#define IP4_ROUTE_FLAG_FIB_INDEX (1 << 1) +#define IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY (1 << 2) +#define IP4_ROUTE_FLAG_NO_REDISTRIBUTE (1 << 3) +/* Not last add/del in group. Facilities batching requests into packets. */ +#define IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP (1 << 4) +/* Dynamic route created via ARP reply. */ +#define IP4_ROUTE_FLAG_NEIGHBOR (1 << 5) + +typedef struct { + /* IP4_ROUTE_FLAG_* */ + u32 flags; + + /* Either index of fib or table_id to hash and get fib. + IP4_ROUTE_FLAG_FIB_INDEX specifies index; otherwise table_id is assumed. */ + u32 table_index_or_table_id; + + /* Destination address (prefix) and length. */ + ip4_address_t dst_address; + u32 dst_address_length; + + /* Adjacency to use for this destination. */ + u32 adj_index; + + /* If specified adjacencies to add and then + use for this destination. add_adj/n_add_adj + are override adj_index if specified. */ + ip_adjacency_t * add_adj; + u32 n_add_adj; +} ip4_add_del_route_args_t; + +ip4_fib_t * +find_ip4_fib_by_table_index_or_id (ip4_main_t * im, + u32 table_index_or_id, u32 flags); + +void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * args); + +void ip4_add_del_route_next_hop (ip4_main_t * im, + u32 flags, + ip4_address_t * dst_address, + u32 dst_address_length, + ip4_address_t * next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_weight, u32 adj_index, + u32 explicit_fib_index); + +void * +ip4_get_route (ip4_main_t * im, + u32 fib_index_or_table_id, + u32 flags, + u8 * address, + u32 address_length); + +void +ip4_foreach_matching_route (ip4_main_t * im, + u32 table_index_or_table_id, + u32 flags, + ip4_address_t * address, + u32 address_length, + ip4_address_t ** results, + u8 ** result_lengths); + +void ip4_delete_matching_routes (ip4_main_t * im, + u32 table_index_or_table_id, + u32 flags, + ip4_address_t * address, + u32 address_length); + +void ip4_maybe_remap_adjacencies (ip4_main_t * im, + u32 table_index_or_table_id, + u32 flags); + +void ip4_adjacency_set_interface_route (vnet_main_t * vnm, + ip_adjacency_t * adj, + u32 sw_if_index, + u32 if_address_index); + +/* Send an ARP request to see if given destination is reachable on given interface. */ +clib_error_t * +ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index); + +clib_error_t * +ip4_set_arp_limit (u32 arp_limit); + +uword +ip4_tcp_register_listener (vlib_main_t * vm, + u16 dst_port, + u32 next_node_index); +uword +ip4_udp_register_listener (vlib_main_t * vm, + u16 dst_port, + u32 next_node_index); + +void +ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, + u32 node_index); + +u16 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip4_header_t * ip0); + +void ip4_register_protocol (u32 protocol, u32 node_index); + +serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main; + +int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config); + +void ip4_mtrie_init (ip4_fib_mtrie_t * m); + +int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 table_index); + +/* Compute flow hash. We'll use it to select which adjacency to use for this + flow. And other things. */ +always_inline u32 +ip4_compute_flow_hash (ip4_header_t * ip, u32 flow_hash_config) +{ + tcp_header_t * tcp = (void *) (ip + 1); + u32 a, b, c, t1, t2; + uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP + || ip->protocol == IP_PROTOCOL_UDP); + + t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) + ? ip->src_address.data_u32 : 0; + t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) + ? ip->dst_address.data_u32 : 0; + + a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1; + b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2; + b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0; + + t1 = is_tcp_udp ? tcp->ports.src : 0; + t2 = is_tcp_udp ? tcp->ports.dst : 0; + + t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0; + t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0; + + c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? + (t1<<16) | t2 : (t2<<16) | t1; + + hash_v3_mix32 (a, b, c); + hash_v3_finalize32 (a, b, c); + + return c; +} + +#endif /* included_ip_ip4_h */ diff --git a/vnet/vnet/ip/ip46_cli.c b/vnet/vnet/ip/ip46_cli.c new file mode 100644 index 00000000000..44dde9bf3e7 --- /dev/null +++ b/vnet/vnet/ip/ip46_cli.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_cli.c: ip4 commands + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2) +{ return clib_net_to_host_u32 (a1->data_u32) - clib_net_to_host_u32 (a2->data_u32); } + +int ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2) +{ + int i; + for (i = 0; i < ARRAY_LEN (a1->as_u16); i++) + { + int cmp = clib_net_to_host_u16 (a1->as_u16[i]) - clib_net_to_host_u16 (a2->as_u16[i]); + if (cmp != 0) + return cmp; + } + return 0; +} + +VLIB_CLI_COMMAND (set_interface_ip_command, static) = { + .path = "set interface ip", + .short_help = "IP4/IP6 commands", +}; + +void ip_del_all_interface_addresses (vlib_main_t *vm, u32 sw_if_index) +{ + ip4_main_t * im4 = &ip4_main; + ip4_address_t * ip4_addrs = 0; + u32 *ip4_masks = 0; + ip6_main_t * im6 = &ip6_main; + ip6_address_t * ip6_addrs = 0; + u32 *ip6_masks = 0; + ip_interface_address_t * ia; + int i; + + foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index, + 0 /* honor unnumbered */, + ({ + ip4_address_t * x = (ip4_address_t *) + ip_interface_address_get_address (&im4->lookup_main, ia); + vec_add1 (ip4_addrs, x[0]); + vec_add1 (ip4_masks, ia->address_length); + })); + + foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index, + 0 /* honor unnumbered */, + ({ + ip6_address_t * x = (ip6_address_t *) + ip_interface_address_get_address (&im6->lookup_main, ia); + vec_add1 (ip6_addrs, x[0]); + vec_add1 (ip6_masks, ia->address_length); + })); + + for (i = 0; i < vec_len (ip4_addrs); i++) + ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i], + ip4_masks[i], 1 /* is_del */); + for (i = 0; i < vec_len (ip6_addrs); i++) + ip6_add_del_interface_address (vm, sw_if_index, &ip6_addrs[i], + ip6_masks[i], 1 /* is_del */); + + vec_free (ip4_addrs); + vec_free (ip4_masks); + vec_free (ip6_addrs); + vec_free (ip6_masks); +} + +static clib_error_t * +add_del_ip_address (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t a4; + ip6_address_t a6; + clib_error_t * error = 0; + u32 sw_if_index, length, is_del; + + sw_if_index = ~0; + is_del = 0; + + if (unformat (input, "del")) + is_del = 1; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (is_del && unformat (input, "all")) + ip_del_all_interface_addresses (vm, sw_if_index); + else if (unformat (input, "%U/%d", unformat_ip4_address, &a4, &length)) + error = ip4_add_del_interface_address (vm, sw_if_index, &a4, length, + is_del); + else if (unformat (input, "%U/%d", unformat_ip6_address, &a6, &length)) + error = ip6_add_del_interface_address (vm, sw_if_index, &a6, length, + is_del); + else + { + error = clib_error_return (0, "expected IP4/IP6 address/length `%U'", + format_unformat_error, input); + goto done; + } + + + done: + return error; +} + +VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = { + .path = "set interface ip address", + .function = add_del_ip_address, + .short_help = "Add/delete IP4/IP6 address for interface", +}; + +/* Dummy init function to get us linked in. */ +static clib_error_t * ip4_cli_init (vlib_main_t * vm) +{ return 0; } + +VLIB_INIT_FUNCTION (ip4_cli_init); diff --git a/vnet/vnet/ip/ip4_error.h b/vnet/vnet/ip/ip4_error.h new file mode 100644 index 00000000000..b84b082b993 --- /dev/null +++ b/vnet/vnet/ip/ip4_error.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_error.h: ip4 fast path errors + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_ip4_error_h +#define included_ip_ip4_error_h + +#define foreach_ip4_error \ + /* Must be first. */ \ + _ (NONE, "valid ip4 packets") \ + \ + /* Errors signalled by ip4-input */ \ + _ (TOO_SHORT, "ip4 length < 20 bytes") \ + _ (BAD_LENGTH, "ip4 length > l2 length") \ + _ (BAD_CHECKSUM, "bad ip4 checksum") \ + _ (VERSION, "ip4 version != 4") \ + _ (OPTIONS, "ip4 options present") \ + _ (FRAGMENT_OFFSET_ONE, "ip4 fragment offset == 1") \ + _ (TIME_EXPIRED, "ip4 ttl <= 1") \ + \ + /* Errors signalled by ip4-rewrite. */ \ + _ (MTU_EXCEEDED, "ip4 MTU exceeded and DF set") \ + _ (DST_LOOKUP_MISS, "ip4 destination lookup miss") \ + _ (SRC_LOOKUP_MISS, "ip4 source lookup miss") \ + _ (ADJACENCY_DROP, "ip4 adjacency drop") \ + _ (ADJACENCY_PUNT, "ip4 adjacency punt") \ + \ + /* Errors signalled by ip4-local. */ \ + _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \ + _ (TCP_CHECKSUM, "bad tcp checksum") \ + _ (UDP_CHECKSUM, "bad udp checksum") \ + _ (UDP_LENGTH, "inconsistent udp/ip lengths") \ + \ + /* Errors signalled by ip4-source-check. */ \ + _ (UNICAST_SOURCE_CHECK_FAILS, "ip4 unicast source check fails") \ + \ + /* Spoofed packets in ip4-rewrite-local */ \ + _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \ + \ + /* Erros singalled by ip4-inacl */ \ + _ (INACL_TABLE_MISS, "input ACL table-miss drops") \ + _ (INACL_SESSION_DENY, "input ACL session deny drops") + +typedef enum { +#define _(sym,str) IP4_ERROR_##sym, + foreach_ip4_error +#undef _ + IP4_N_ERROR, +} ip4_error_t; + +#endif /* included_ip_ip4_error_h */ diff --git a/vnet/vnet/ip/ip4_format.c b/vnet/vnet/ip/ip4_format.c new file mode 100644 index 00000000000..5f4f8e3667d --- /dev/null +++ b/vnet/vnet/ip/ip4_format.c @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_format.c: ip4 formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +/* Format an IP4 address. */ +u8 * format_ip4_address (u8 * s, va_list * args) +{ + u8 * a = va_arg (*args, u8 *); + return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]); +} + +/* Format an IP4 route destination and length. */ +u8 * format_ip4_address_and_length (u8 * s, va_list * args) +{ + u8 * a = va_arg (*args, u8 *); + u8 l = va_arg (*args, u32); + return format (s, "%U/%d", format_ip4_address, a, l); +} + +/* Parse an IP4 address %d.%d.%d.%d. */ +uword unformat_ip4_address (unformat_input_t * input, va_list * args) +{ + u8 * result = va_arg (*args, u8 *); + unsigned a[4]; + + if (! unformat (input, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3])) + return 0; + + if (a[0] >= 256 || a[1] >= 256 || a[2] >= 256 || a[3] >= 256) + return 0; + + result[0] = a[0]; + result[1] = a[1]; + result[2] = a[2]; + result[3] = a[3]; + + return 1; +} + +/* Format an IP4 header. */ +u8 * format_ip4_header (u8 * s, va_list * args) +{ + ip4_header_t * ip = va_arg (*args, ip4_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 ip_version, header_bytes; + uword indent; + + /* Nothing to do. */ + if (max_header_bytes < sizeof (ip[0])) + return format (s, "IP header truncated"); + + indent = format_get_indent (s); + indent += 2; + + ip_version = (ip->ip_version_and_header_length >> 4); + header_bytes = (ip->ip_version_and_header_length & 0xf) * sizeof (u32); + + s = format (s, "%U: %U -> %U", + format_ip_protocol, ip->protocol, + format_ip4_address, ip->src_address.data, + format_ip4_address, ip->dst_address.data); + + /* Show IP version and header length only with unexpected values. */ + if (ip_version != 4 || header_bytes != sizeof (ip4_header_t)) + s = format (s, "\n%Uversion %d, header length %d", + format_white_space, indent, + ip_version, header_bytes); + + s = format (s, "\n%Utos 0x%02x, ttl %d, length %d, checksum 0x%04x", + format_white_space, indent, + ip->tos, ip->ttl, + clib_net_to_host_u16 (ip->length), + clib_net_to_host_u16 (ip->checksum)); + + /* Check and report invalid checksums. */ + { + u16 c = ip4_header_checksum (ip); + if (c != ip->checksum) + s = format (s, " (should be 0x%04x)", clib_net_to_host_u16 (c)); + } + + { + u32 f = clib_net_to_host_u16 (ip->flags_and_fragment_offset); + u32 o; + + s = format (s, "\n%Ufragment id 0x%04x", + format_white_space, indent, + clib_net_to_host_u16 (ip->fragment_id)); + + /* Fragment offset. */ + o = 8 * (f & 0x1fff); + f ^= o; + if (o != 0) + s = format (s, " offset %d", o); + + if (f != 0) + { + s = format (s, ", flags "); +#define _(l) if (f & IP4_HEADER_FLAG_##l) s = format (s, #l); + _ (MORE_FRAGMENTS); + _ (DONT_FRAGMENT); + _ (CONGESTION); +#undef _ + } + } + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi = ip_get_protocol_info (im, ip->protocol); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", + format_white_space, indent - 2, + pi->format_header, + /* next protocol header */ (void*) ip + header_bytes, + max_header_bytes - header_bytes); + } + + return s; +} + +/* Parse an IP4 header. */ +uword unformat_ip4_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + ip4_header_t * ip; + int old_length; + + /* Allocate space for IP header. */ + { + void * p; + + old_length = vec_len (*result); + vec_add2 (*result, p, sizeof (ip4_header_t)); + ip = p; + } + + memset (ip, 0, sizeof (ip[0])); + ip->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + + if (! unformat (input, "%U: %U -> %U", + unformat_ip_protocol, &ip->protocol, + unformat_ip4_address, &ip->src_address, + unformat_ip4_address, &ip->dst_address)) + return 0; + + /* Parse options. */ + while (1) + { + int i, j; + + if (unformat (input, "tos %U", unformat_vlib_number, &i)) + ip->tos = i; + + else if (unformat (input, "ttl %U", unformat_vlib_number, &i)) + ip->ttl = i; + + else if (unformat (input, "fragment id %U offset %U", + unformat_vlib_number, &i, + unformat_vlib_number, &j)) + { + ip->fragment_id = clib_host_to_net_u16 (i); + ip->flags_and_fragment_offset |= + clib_host_to_net_u16 ((i / 8) & 0x1fff); + } + + /* Flags. */ + else if (unformat (input, "mf") || unformat (input, "MF")) + ip->flags_and_fragment_offset |= clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); + + else if (unformat (input, "df") || unformat (input, "DF")) + ip->flags_and_fragment_offset |= clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); + + else if (unformat (input, "ce") || unformat (input, "CE")) + ip->flags_and_fragment_offset |= clib_host_to_net_u16 (IP4_HEADER_FLAG_CONGESTION); + + /* Can't parse input: try next protocol level. */ + else + break; + } + + /* Fill in checksum. */ + ip->checksum = ip4_header_checksum (ip); + + /* Recurse into next protocol layer. */ + { + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi = ip_get_protocol_info (im, ip->protocol); + + if (pi && pi->unformat_header) + { + if (! unformat_user (input, pi->unformat_header, result)) + return 0; + + /* Result may have moved. */ + ip = (void *) *result + old_length; + } + } + + /* Fill in IP length. */ + ip->length = clib_host_to_net_u16 (vec_len (*result) - old_length); + + return 1; +} diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c new file mode 100644 index 00000000000..fd304163a6b --- /dev/null +++ b/vnet/vnet/ip/ip4_forward.c @@ -0,0 +1,3564 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_forward.c: IP v4 forwarding + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */ +#include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */ +#include <vnet/ppp/ppp.h> +#include <vnet/srp/srp.h> /* for srp_hw_interface_class */ +#include <vnet/api_errno.h> /* for API error numbers */ + +/* This is really, really simple but stupid fib. */ +u32 +ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index, + ip4_address_t * dst, + u32 disable_default_route) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index); + uword * p, * hash, key; + i32 i, i_min, dst_address, ai; + + i_min = disable_default_route ? 1 : 0; + dst_address = clib_mem_unaligned (&dst->data_u32, u32); + for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--) + { + hash = fib->adj_index_by_dst_address[i]; + if (! hash) + continue; + + key = dst_address & im->fib_masks[i]; + if ((p = hash_get (hash, key)) != 0) + { + ai = p[0]; + goto done; + } + } + + /* Nothing matches in table. */ + ai = lm->miss_adj_index; + + done: + return ai; +} + +static ip4_fib_t * +create_fib_with_table_id (ip4_main_t * im, u32 table_id) +{ + ip4_fib_t * fib; + hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs)); + vec_add2 (im->fibs, fib, 1); + fib->table_id = table_id; + fib->index = fib - im->fibs; + fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; + fib->fwd_classify_table_index = ~0; + fib->rev_classify_table_index = ~0; + ip4_mtrie_init (&fib->mtrie); + return fib; +} + +ip4_fib_t * +find_ip4_fib_by_table_index_or_id (ip4_main_t * im, + u32 table_index_or_id, u32 flags) +{ + uword * p, fib_index; + + fib_index = table_index_or_id; + if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX)) + { + p = hash_get (im->fib_index_by_table_id, table_index_or_id); + if (! p) + return create_fib_with_table_id (im, table_index_or_id); + fib_index = p[0]; + } + return vec_elt_at_index (im->fibs, fib_index); +} + +static void +ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm, + ip4_fib_t * fib, + u32 address_length) +{ + hash_t * h; + uword max_index; + + ASSERT (lm->fib_result_n_bytes >= sizeof (uword)); + lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword); + + fib->adj_index_by_dst_address[address_length] = + hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword)); + + hash_set_flags (fib->adj_index_by_dst_address[address_length], + HASH_FLAG_NO_AUTO_SHRINK); + + h = hash_header (fib->adj_index_by_dst_address[address_length]); + max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1; + + /* Initialize new/old hash value vectors. */ + vec_validate_init_empty (fib->new_hash_values, max_index, ~0); + vec_validate_init_empty (fib->old_hash_values, max_index, ~0); +} + +static void serialize_ip4_address (serialize_main_t * m, va_list * va) +{ + ip4_address_t * a = va_arg (*va, ip4_address_t *); + u8 * p = serialize_get (m, sizeof (a->as_u8)); + memcpy (p, a->as_u8, sizeof (a->as_u8)); +} + +static void unserialize_ip4_address (serialize_main_t * m, va_list * va) +{ + ip4_address_t * a = va_arg (*va, ip4_address_t *); + u8 * p = unserialize_get (m, sizeof (a->as_u8)); + memcpy (a->as_u8, p, sizeof (a->as_u8)); +} + +static void serialize_ip4_address_and_length (serialize_main_t * m, va_list * va) +{ + ip4_address_t * a = va_arg (*va, ip4_address_t *); + u32 l = va_arg (*va, u32); + u32 n_bytes = (l / 8) + ((l % 8) != 0); + u8 * p = serialize_get (m, 1 + n_bytes); + ASSERT (l <= 32); + p[0] = l; + memcpy (p + 1, a->as_u8, n_bytes); +} + +static void unserialize_ip4_address_and_length (serialize_main_t * m, va_list * va) +{ + ip4_address_t * a = va_arg (*va, ip4_address_t *); + u32 * al = va_arg (*va, u32 *); + u8 * p = unserialize_get (m, 1); + u32 l, n_bytes; + + al[0] = l = p[0]; + ASSERT (l <= 32); + n_bytes = (l / 8) + ((l % 8) != 0); + + if (n_bytes) + { + p = unserialize_get (m, n_bytes); + memcpy (a->as_u8, p, n_bytes); + } +} + +static void serialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va) +{ + ip4_add_del_route_args_t * a = va_arg (*va, ip4_add_del_route_args_t *); + + serialize_likely_small_unsigned_integer (m, a->table_index_or_table_id); + serialize_likely_small_unsigned_integer (m, a->flags); + serialize (m, serialize_ip4_address_and_length, &a->dst_address, a->dst_address_length); + serialize_likely_small_unsigned_integer (m, a->adj_index); + serialize_likely_small_unsigned_integer (m, a->n_add_adj); + if (a->n_add_adj > 0) + serialize (m, serialize_vec_ip_adjacency, a->add_adj, a->n_add_adj); +} + +/* Serialized adjacencies for arp/rewrite do not send graph next_index + since graph hookup is not guaranteed to be the same for both sides + of serialize/unserialize. */ +static void +unserialize_fixup_ip4_rewrite_adjacencies (vlib_main_t * vm, + ip_adjacency_t * adj, + u32 n_adj) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 i, ni, sw_if_index, is_arp; + vnet_hw_interface_t * hw; + + for (i = 0; i < n_adj; i++) + { + switch (adj[i].lookup_next_index) + { + case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_ARP: + is_arp = adj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP; + sw_if_index = adj[i].rewrite_header.sw_if_index; + hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + ni = is_arp ? ip4_arp_node.index : ip4_rewrite_node.index; + adj[i].rewrite_header.node_index = ni; + adj[i].rewrite_header.next_index = vlib_node_add_next (vm, ni, hw->output_node_index); + if (is_arp) + vnet_rewrite_for_sw_interface + (vnm, + VNET_L3_PACKET_TYPE_ARP, + sw_if_index, + ni, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, + &adj[i].rewrite_header, + sizeof (adj->rewrite_data)); + break; + + default: + break; + } + } +} + +static void unserialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va) +{ + ip4_main_t * i4m = &ip4_main; + ip4_add_del_route_args_t a; + + a.table_index_or_table_id = unserialize_likely_small_unsigned_integer (m); + a.flags = unserialize_likely_small_unsigned_integer (m); + unserialize (m, unserialize_ip4_address_and_length, &a.dst_address, &a.dst_address_length); + a.adj_index = unserialize_likely_small_unsigned_integer (m); + a.n_add_adj = unserialize_likely_small_unsigned_integer (m); + a.add_adj = 0; + if (a.n_add_adj > 0) + { + vec_resize (a.add_adj, a.n_add_adj); + unserialize (m, unserialize_vec_ip_adjacency, a.add_adj, a.n_add_adj); + unserialize_fixup_ip4_rewrite_adjacencies (vlib_get_main(), + a.add_adj, a.n_add_adj); + } + + /* Prevent re-re-distribution. */ + a.flags |= IP4_ROUTE_FLAG_NO_REDISTRIBUTE; + + ip4_add_del_route (i4m, &a); + + vec_free (a.add_adj); +} + +MC_SERIALIZE_MSG (ip4_add_del_route_msg, static) = { + .name = "vnet_ip4_add_del_route", + .serialize = serialize_ip4_add_del_route_msg, + .unserialize = unserialize_ip4_add_del_route_msg, +}; + +static void +ip4_fib_set_adj_index (ip4_main_t * im, + ip4_fib_t * fib, + u32 flags, + u32 dst_address_u32, + u32 dst_address_length, + u32 adj_index) +{ + ip_lookup_main_t * lm = &im->lookup_main; + uword * hash; + + if (vec_bytes(fib->old_hash_values)) + memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values)); + if (vec_bytes(fib->new_hash_values)) + memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values)); + fib->new_hash_values[0] = adj_index; + + /* Make sure adj index is valid. */ + if (CLIB_DEBUG > 0) + (void) ip_get_adjacency (lm, adj_index); + + hash = fib->adj_index_by_dst_address[dst_address_length]; + + hash = _hash_set3 (hash, dst_address_u32, + fib->new_hash_values, + fib->old_hash_values); + + fib->adj_index_by_dst_address[dst_address_length] = hash; + + if (vec_len (im->add_del_route_callbacks) > 0) + { + ip4_add_del_route_callback_t * cb; + ip4_address_t d; + uword * p; + + d.data_u32 = dst_address_u32; + vec_foreach (cb, im->add_del_route_callbacks) + if ((flags & cb->required_flags) == cb->required_flags) + cb->function (im, cb->function_opaque, + fib, flags, + &d, dst_address_length, + fib->old_hash_values, + fib->new_hash_values); + + p = hash_get (hash, dst_address_u32); + memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values)); + } +} + +void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a) +{ + vlib_main_t * vm = vlib_get_main(); + ip_lookup_main_t * lm = &im->lookup_main; + ip4_fib_t * fib; + u32 dst_address, dst_address_length, adj_index, old_adj_index; + uword * hash, is_del; + ip4_add_del_route_callback_t * cb; + + if (vm->mc_main && ! (a->flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE)) + { + u32 multiple_messages_per_vlib_buffer = (a->flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP); + mc_serialize2 (vm->mc_main, multiple_messages_per_vlib_buffer, + &ip4_add_del_route_msg, a); + return; + } + + /* Either create new adjacency or use given one depending on arguments. */ + if (a->n_add_adj > 0) + { + ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); + ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); + } + else + adj_index = a->adj_index; + + dst_address = a->dst_address.data_u32; + dst_address_length = a->dst_address_length; + fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags); + + ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); + dst_address &= im->fib_masks[dst_address_length]; + + if (! fib->adj_index_by_dst_address[dst_address_length]) + ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length); + + hash = fib->adj_index_by_dst_address[dst_address_length]; + + is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0; + + if (is_del) + { + fib->old_hash_values[0] = ~0; + hash = _hash_unset (hash, dst_address, fib->old_hash_values); + fib->adj_index_by_dst_address[dst_address_length] = hash; + + if (vec_len (im->add_del_route_callbacks) > 0 + && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */ + { + fib->new_hash_values[0] = ~0; + vec_foreach (cb, im->add_del_route_callbacks) + if ((a->flags & cb->required_flags) == cb->required_flags) + cb->function (im, cb->function_opaque, + fib, a->flags, + &a->dst_address, dst_address_length, + fib->old_hash_values, + fib->new_hash_values); + } + } + else + ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length, + adj_index); + + old_adj_index = fib->old_hash_values[0]; + + ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length, + is_del ? old_adj_index : adj_index, + is_del); + + /* Delete old adjacency index if present and changed. */ + if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY) + && old_adj_index != ~0 + && old_adj_index != adj_index) + ip_del_adjacency (lm, old_adj_index); +} + +static void serialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va) +{ + u32 flags = va_arg (*va, u32); + ip4_address_t * dst_address = va_arg (*va, ip4_address_t *); + u32 dst_address_length = va_arg (*va, u32); + ip4_address_t * next_hop_address = va_arg (*va, ip4_address_t *); + u32 next_hop_sw_if_index = va_arg (*va, u32); + u32 next_hop_weight = va_arg (*va, u32); + + serialize_likely_small_unsigned_integer (m, flags); + serialize (m, serialize_ip4_address_and_length, dst_address, dst_address_length); + serialize (m, serialize_ip4_address, next_hop_address); + serialize_likely_small_unsigned_integer (m, next_hop_sw_if_index); + serialize_likely_small_unsigned_integer (m, next_hop_weight); +} + +static void unserialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va) +{ + ip4_main_t * im = &ip4_main; + u32 flags, dst_address_length, next_hop_sw_if_index, next_hop_weight; + ip4_address_t dst_address, next_hop_address; + + flags = unserialize_likely_small_unsigned_integer (m); + unserialize (m, unserialize_ip4_address_and_length, &dst_address, &dst_address_length); + unserialize (m, unserialize_ip4_address, &next_hop_address); + next_hop_sw_if_index = unserialize_likely_small_unsigned_integer (m); + next_hop_weight = unserialize_likely_small_unsigned_integer (m); + + ip4_add_del_route_next_hop + (im, + flags | IP4_ROUTE_FLAG_NO_REDISTRIBUTE, + &dst_address, + dst_address_length, + &next_hop_address, + next_hop_sw_if_index, + next_hop_weight, (u32)~0, + (u32)~0 /* explicit FIB index */); +} + +MC_SERIALIZE_MSG (ip4_add_del_route_next_hop_msg, static) = { + .name = "vnet_ip4_add_del_route_next_hop", + .serialize = serialize_ip4_add_del_route_next_hop_msg, + .unserialize = unserialize_ip4_add_del_route_next_hop_msg, +}; + +void +ip4_add_del_route_next_hop (ip4_main_t * im, + u32 flags, + ip4_address_t * dst_address, + u32 dst_address_length, + ip4_address_t * next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_weight, u32 adj_index, + u32 explicit_fib_index) +{ + vnet_main_t * vnm = vnet_get_main(); + vlib_main_t * vm = vlib_get_main(); + ip_lookup_main_t * lm = &im->lookup_main; + u32 fib_index; + ip4_fib_t * fib; + u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index; + u32 dst_adj_index, nh_adj_index; + uword * dst_hash, * dst_result; + uword * nh_hash, * nh_result; + ip_adjacency_t * dst_adj; + ip_multipath_adjacency_t * old_mp, * new_mp; + int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0; + int is_interface_next_hop; + clib_error_t * error = 0; + + if (vm->mc_main && ! (flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE)) + { + u32 multiple_messages_per_vlib_buffer = (flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP); + mc_serialize2 (vm->mc_main, + multiple_messages_per_vlib_buffer, + &ip4_add_del_route_next_hop_msg, + flags, + dst_address, dst_address_length, + next_hop, next_hop_sw_if_index, next_hop_weight); + return; + } + + if (explicit_fib_index == (u32)~0) + fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index); + else + fib_index = explicit_fib_index; + + fib = vec_elt_at_index (im->fibs, fib_index); + + /* Lookup next hop to be added or deleted. */ + is_interface_next_hop = next_hop->data_u32 == 0; + if (adj_index == (u32)~0) + { + if (is_interface_next_hop) + { + nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index); + if (nh_result) + nh_adj_index = *nh_result; + else + { + ip_adjacency_t * adj; + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &nh_adj_index); + ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0); + ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0); + hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index); + } + } + else + { + nh_hash = fib->adj_index_by_dst_address[32]; + nh_result = hash_get (nh_hash, next_hop->data_u32); + + /* Next hop must be known. */ + if (! nh_result) + { + vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB; + error = clib_error_return (0, "next-hop %U/32 not in FIB", + format_ip4_address, next_hop); + goto done; + } + nh_adj_index = *nh_result; + } + } + else + { + nh_adj_index = adj_index; + } + ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); + dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length]; + + dst_hash = fib->adj_index_by_dst_address[dst_address_length]; + dst_result = hash_get (dst_hash, dst_address_u32); + if (dst_result) + { + dst_adj_index = dst_result[0]; + dst_adj = ip_get_adjacency (lm, dst_adj_index); + } + else + { + /* For deletes destination must be known. */ + if (is_del) + { + vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; + error = clib_error_return (0, "unknown destination %U/%d", + format_ip4_address, dst_address, + dst_address_length); + goto done; + } + + dst_adj_index = ~0; + dst_adj = 0; + } + + /* Ignore adds of X/32 with next hop of X. */ + if (! is_del + && dst_address_length == 32 + && dst_address->data_u32 == next_hop->data_u32 + && adj_index != (u32)~0) + { + vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP; + error = clib_error_return (0, "prefix matches next hop %U/%d", + format_ip4_address, dst_address, + dst_address_length); + goto done; + } + + old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0; + + if (! ip_multipath_adjacency_add_del_next_hop + (lm, is_del, + old_mp_adj_index, + nh_adj_index, + next_hop_weight, + &new_mp_adj_index)) + { + vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP; + error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path", + format_ip4_address, next_hop); + goto done; + } + + old_mp = new_mp = 0; + if (old_mp_adj_index != ~0) + old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); + if (new_mp_adj_index != ~0) + new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index); + + if (old_mp != new_mp) + { + ip4_add_del_route_args_t a; + a.table_index_or_table_id = fib_index; + a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD) + | IP4_ROUTE_FLAG_FIB_INDEX + | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY + | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP))); + a.dst_address = dst_address[0]; + a.dst_address_length = dst_address_length; + a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index; + a.add_adj = 0; + a.n_add_adj = 0; + + ip4_add_del_route (im, &a); + } + + done: + if (error) + clib_error_report (error); +} + +void * +ip4_get_route (ip4_main_t * im, + u32 table_index_or_table_id, + u32 flags, + u8 * address, + u32 address_length) +{ + ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags); + u32 dst_address = * (u32 *) address; + uword * hash, * p; + + ASSERT (address_length < ARRAY_LEN (im->fib_masks)); + dst_address &= im->fib_masks[address_length]; + + hash = fib->adj_index_by_dst_address[address_length]; + p = hash_get (hash, dst_address); + return (void *) p; +} + +void +ip4_foreach_matching_route (ip4_main_t * im, + u32 table_index_or_table_id, + u32 flags, + ip4_address_t * address, + u32 address_length, + ip4_address_t ** results, + u8 ** result_lengths) +{ + ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags); + u32 dst_address = address->data_u32; + u32 this_length = address_length; + + if (*results) + _vec_len (*results) = 0; + if (*result_lengths) + _vec_len (*result_lengths) = 0; + + while (this_length <= 32 && vec_len (results) == 0) + { + uword k, v; + hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({ + if (0 == ((k ^ dst_address) & im->fib_masks[address_length])) + { + ip4_address_t a; + a.data_u32 = k; + vec_add1 (*results, a); + vec_add1 (*result_lengths, this_length); + } + })); + + this_length++; + } +} + +void ip4_maybe_remap_adjacencies (ip4_main_t * im, + u32 table_index_or_table_id, + u32 flags) +{ + ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags); + ip_lookup_main_t * lm = &im->lookup_main; + u32 i, l; + ip4_address_t a; + ip4_add_del_route_callback_t * cb; + static ip4_address_t * to_delete; + + if (lm->n_adjacency_remaps == 0) + return; + + for (l = 0; l <= 32; l++) + { + hash_pair_t * p; + uword * hash = fib->adj_index_by_dst_address[l]; + + if (hash_elts (hash) == 0) + continue; + + if (to_delete) + _vec_len (to_delete) = 0; + + hash_foreach_pair (p, hash, ({ + u32 adj_index = p->value[0]; + u32 m = vec_elt (lm->adjacency_remap_table, adj_index); + + if (m) + { + /* Record destination address from hash key. */ + a.data_u32 = p->key; + + /* New adjacency points to nothing: so delete prefix. */ + if (m == ~0) + vec_add1 (to_delete, a); + else + { + /* Remap to new adjacency. */ + memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values)); + + /* Set new adjacency value. */ + fib->new_hash_values[0] = p->value[0] = m - 1; + + vec_foreach (cb, im->add_del_route_callbacks) + if ((flags & cb->required_flags) == cb->required_flags) + cb->function (im, cb->function_opaque, + fib, flags | IP4_ROUTE_FLAG_ADD, + &a, l, + fib->old_hash_values, + fib->new_hash_values); + } + } + })); + + fib->new_hash_values[0] = ~0; + for (i = 0; i < vec_len (to_delete); i++) + { + hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values); + vec_foreach (cb, im->add_del_route_callbacks) + if ((flags & cb->required_flags) == cb->required_flags) + cb->function (im, cb->function_opaque, + fib, flags | IP4_ROUTE_FLAG_DEL, + &a, l, + fib->old_hash_values, + fib->new_hash_values); + } + } + + /* Also remap adjacencies in mtrie. */ + ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie); + + /* Reset mapping table. */ + vec_zero (lm->adjacency_remap_table); + + /* All remaps have been performed. */ + lm->n_adjacency_remaps = 0; +} + +void ip4_delete_matching_routes (ip4_main_t * im, + u32 table_index_or_table_id, + u32 flags, + ip4_address_t * address, + u32 address_length) +{ + static ip4_address_t * matching_addresses; + static u8 * matching_address_lengths; + u32 l, i; + ip4_add_del_route_args_t a; + + a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags; + a.table_index_or_table_id = table_index_or_table_id; + a.adj_index = ~0; + a.add_adj = 0; + a.n_add_adj = 0; + + for (l = address_length + 1; l <= 32; l++) + { + ip4_foreach_matching_route (im, table_index_or_table_id, flags, + address, + l, + &matching_addresses, + &matching_address_lengths); + for (i = 0; i < vec_len (matching_addresses); i++) + { + a.dst_address = matching_addresses[i]; + a.dst_address_length = matching_address_lengths[i]; + ip4_add_del_route (im, &a); + } + } + + ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags); +} + +always_inline uword +ip4_lookup_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int lookup_for_responses_to_locally_received_packets) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + ip_lookup_next_t next; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * p0, * p1; + ip4_header_t * ip0, * ip1; + __attribute__((unused)) tcp_header_t * tcp0, * tcp1; + ip_lookup_next_t next0, next1; + ip_adjacency_t * adj0, * adj1; + ip4_fib_mtrie_t * mtrie0, * mtrie1; + ip4_fib_mtrie_leaf_t leaf0, leaf1; + __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0; + __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1; + u32 flow_hash_config0, flow_hash_config1; + u32 hash_c0, hash_c1; + u32 wrong_next; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); + fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? + fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; + fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ? + fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX]; + + + if (! lookup_for_responses_to_locally_received_packets) + { + mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; + mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; + + leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 0); + } + + tcp0 = (void *) (ip0 + 1); + tcp1 = (void *) (ip1 + 1); + + is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP + || ip0->protocol == IP_PROTOCOL_UDP); + is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP + || ip1->protocol == IP_PROTOCOL_UDP); + + if (! lookup_for_responses_to_locally_received_packets) + { + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 1); + } + + if (! lookup_for_responses_to_locally_received_packets) + { + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 2); + } + + if (! lookup_for_responses_to_locally_received_packets) + { + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 3); + } + + if (lookup_for_responses_to_locally_received_packets) + { + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; + adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX]; + } + else + { + /* Handle default route. */ + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); + + adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + } + + ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, + &ip0->dst_address, + /* no_default_route */ 0)); + ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, + &ip1->dst_address, + /* no_default_route */ 0)); + adj0 = ip_get_adjacency (lm, adj_index0); + adj1 = ip_get_adjacency (lm, adj_index1); + + next0 = adj0->lookup_next_index; + next1 = adj1->lookup_next_index; + + /* Use flow hash to compute multipath adjacency. */ + hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; + hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0; + if (PREDICT_FALSE (adj0->n_adj > 1)) + { + flow_hash_config0 = + vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; + hash_c0 = vnet_buffer (p0)->ip.flow_hash = + ip4_compute_flow_hash (ip0, flow_hash_config0); + } + if (PREDICT_FALSE(adj1->n_adj > 1)) + { + flow_hash_config1 = + vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config; + hash_c1 = vnet_buffer (p1)->ip.flow_hash = + ip4_compute_flow_hash (ip1, flow_hash_config1); + } + + ASSERT (adj0->n_adj > 0); + ASSERT (adj1->n_adj > 0); + ASSERT (is_pow2 (adj0->n_adj)); + ASSERT (is_pow2 (adj1->n_adj)); + adj_index0 += (hash_c0 & (adj0->n_adj - 1)); + adj_index1 += (hash_c1 & (adj1->n_adj - 1)); + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; + + vlib_increment_combined_counter + (cm, cpu_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0) + + sizeof(ethernet_header_t)); + vlib_increment_combined_counter + (cm, cpu_index, adj_index1, 1, + vlib_buffer_length_in_chain (vm, p1) + + sizeof(ethernet_header_t)); + + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + wrong_next = (next0 != next) + 2*(next1 != next); + if (PREDICT_FALSE (wrong_next != 0)) + { + switch (wrong_next) + { + case 1: + /* A B A */ + to_next[-2] = pi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next1, pi1); + break; + + case 3: + /* A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + vlib_set_next_frame_buffer (vm, node, next1, pi1); + if (next0 == next1) + { + /* A B B */ + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next1; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + } + } + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + __attribute__((unused)) tcp_header_t * tcp0; + ip_lookup_next_t next0; + ip_adjacency_t * adj0; + ip4_fib_mtrie_t * mtrie0; + ip4_fib_mtrie_leaf_t leaf0; + __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0; + u32 flow_hash_config0, hash_c0; + + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? + fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; + + if (! lookup_for_responses_to_locally_received_packets) + { + mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; + + leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0); + } + + tcp0 = (void *) (ip0 + 1); + + is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP + || ip0->protocol == IP_PROTOCOL_UDP); + + if (! lookup_for_responses_to_locally_received_packets) + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1); + + if (! lookup_for_responses_to_locally_received_packets) + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2); + + if (! lookup_for_responses_to_locally_received_packets) + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3); + + if (lookup_for_responses_to_locally_received_packets) + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; + else + { + /* Handle default route. */ + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + } + + ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, + &ip0->dst_address, + /* no_default_route */ 0)); + + adj0 = ip_get_adjacency (lm, adj_index0); + + next0 = adj0->lookup_next_index; + + /* Use flow hash to compute multipath adjacency. */ + hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; + if (PREDICT_FALSE(adj0->n_adj > 1)) + { + flow_hash_config0 = + vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; + + hash_c0 = vnet_buffer (p0)->ip.flow_hash = + ip4_compute_flow_hash (ip0, flow_hash_config0); + } + + ASSERT (adj0->n_adj > 0); + ASSERT (is_pow2 (adj0->n_adj)); + adj_index0 += (hash_c0 & (adj0->n_adj - 1)); + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + + vlib_increment_combined_counter + (cm, cpu_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0) + + sizeof(ethernet_header_t)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +ip4_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_lookup_inline (vm, node, frame, /* lookup_for_responses_to_locally_received_packets */ 0); + +} + +void ip4_adjacency_set_interface_route (vnet_main_t * vnm, + ip_adjacency_t * adj, + u32 sw_if_index, + u32 if_address_index) +{ + vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + ip_lookup_next_t n; + vnet_l3_packet_type_t packet_type; + u32 node_index; + + if (hw->hw_class_index == ethernet_hw_interface_class.index + || hw->hw_class_index == srp_hw_interface_class.index) + { + /* + * We have a bit of a problem in this case. ip4-arp uses + * the rewrite_header.next_index to hand pkts to the + * indicated inteface output node. We can end up in + * ip4_rewrite_local, too, which also pays attention to + * rewrite_header.next index. Net result: a hack in + * ip4_rewrite_local... + */ + n = IP_LOOKUP_NEXT_ARP; + node_index = ip4_arp_node.index; + adj->if_address_index = if_address_index; + packet_type = VNET_L3_PACKET_TYPE_ARP; + } + else + { + n = IP_LOOKUP_NEXT_REWRITE; + node_index = ip4_rewrite_node.index; + packet_type = VNET_L3_PACKET_TYPE_IP4; + } + + adj->lookup_next_index = n; + vnet_rewrite_for_sw_interface + (vnm, + packet_type, + sw_if_index, + node_index, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, + &adj->rewrite_header, + sizeof (adj->rewrite_data)); +} + +static void +ip4_add_interface_routes (u32 sw_if_index, + ip4_main_t * im, u32 fib_index, + ip_interface_address_t * a) +{ + vnet_main_t * vnm = vnet_get_main(); + ip_lookup_main_t * lm = &im->lookup_main; + ip_adjacency_t * adj; + ip4_address_t * address = ip_interface_address_get_address (lm, a); + ip4_add_del_route_args_t x; + vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index); + u32 classify_table_index; + + /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ + x.table_index_or_table_id = fib_index; + x.flags = (IP4_ROUTE_FLAG_ADD + | IP4_ROUTE_FLAG_FIB_INDEX + | IP4_ROUTE_FLAG_NO_REDISTRIBUTE); + x.dst_address = address[0]; + x.dst_address_length = a->address_length; + x.n_add_adj = 0; + x.add_adj = 0; + + a->neighbor_probe_adj_index = ~0; + if (a->address_length < 32) + { + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &x.adj_index); + ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool); + ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); + ip4_add_del_route (im, &x); + a->neighbor_probe_adj_index = x.adj_index; + } + + /* Add e.g. 1.1.1.1/32 as local to this host. */ + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &x.adj_index); + + classify_table_index = ~0; + if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index)) + classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index]; + if (classify_table_index != (u32) ~0) + { + adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY; + adj->classify_table_index = classify_table_index; + } + else + adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; + + adj->if_address_index = a - lm->if_address_pool; + adj->rewrite_header.sw_if_index = sw_if_index; + adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX]; + /* + * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local + * fail an RPF-ish check, but still go thru the rewrite code... + */ + adj->rewrite_header.data_bytes = 0; + + ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); + x.dst_address_length = 32; + ip4_add_del_route (im, &x); +} + +static void +ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length) +{ + ip4_add_del_route_args_t x; + + /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ + x.table_index_or_table_id = fib_index; + x.flags = (IP4_ROUTE_FLAG_DEL + | IP4_ROUTE_FLAG_FIB_INDEX + | IP4_ROUTE_FLAG_NO_REDISTRIBUTE); + x.dst_address = address[0]; + x.dst_address_length = address_length; + x.adj_index = ~0; + x.n_add_adj = 0; + x.add_adj = 0; + + if (address_length < 32) + ip4_add_del_route (im, &x); + + x.dst_address_length = 32; + ip4_add_del_route (im, &x); + + ip4_delete_matching_routes (im, + fib_index, + IP4_ROUTE_FLAG_FIB_INDEX, + address, + address_length); +} + +typedef struct { + u32 sw_if_index; + ip4_address_t address; + u32 length; +} ip4_interface_address_t; + +static void serialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va) +{ + ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *); + u32 n = va_arg (*va, u32); + u32 i; + for (i = 0; i < n; i++) { + serialize_integer (m, a[i].sw_if_index, sizeof (a[i].sw_if_index)); + serialize (m, serialize_ip4_address, &a[i].address); + serialize_integer (m, a[i].length, sizeof (a[i].length)); + } +} + +static void unserialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va) +{ + ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *); + u32 n = va_arg (*va, u32); + u32 i; + for (i = 0; i < n; i++) { + unserialize_integer (m, &a[i].sw_if_index, sizeof (a[i].sw_if_index)); + unserialize (m, unserialize_ip4_address, &a[i].address); + unserialize_integer (m, &a[i].length, sizeof (a[i].length)); + } +} + +static void serialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va) +{ + ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *); + int is_del = va_arg (*va, int); + serialize (m, serialize_vec_ip4_set_interface_address, a, 1); + serialize_integer (m, is_del, sizeof (is_del)); +} + +static clib_error_t * +ip4_add_del_interface_address_internal (vlib_main_t * vm, + u32 sw_if_index, + ip4_address_t * new_address, + u32 new_length, + u32 redistribute, + u32 insert_routes, + u32 is_del); + +static void unserialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va) +{ + mc_main_t * mcm = va_arg (*va, mc_main_t *); + vlib_main_t * vm = mcm->vlib_main; + ip4_interface_address_t a; + clib_error_t * error; + int is_del; + + unserialize (m, unserialize_vec_ip4_set_interface_address, &a, 1); + unserialize_integer (m, &is_del, sizeof (is_del)); + error = ip4_add_del_interface_address_internal + (vm, a.sw_if_index, &a.address, a.length, + /* redistribute */ 0, + /* insert_routes */ 1, + is_del); + if (error) + clib_error_report (error); +} + +MC_SERIALIZE_MSG (ip4_set_interface_address_msg, static) = { + .name = "vnet_ip4_set_interface_address", + .serialize = serialize_ip4_set_interface_address_msg, + .unserialize = unserialize_ip4_set_interface_address_msg, +}; + +static clib_error_t * +ip4_add_del_interface_address_internal (vlib_main_t * vm, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 redistribute, + u32 insert_routes, + u32 is_del) +{ + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + clib_error_t * error = 0; + u32 if_address_index, elts_before; + ip4_address_fib_t ip4_af, * addr_fib = 0; + + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + ip4_addr_fib_init (&ip4_af, address, + vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); + vec_add1 (addr_fib, ip4_af); + + /* When adding an address check that it does not conflict with an existing address. */ + if (! is_del) + { + ip_interface_address_t * ia; + foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, + 0 /* honor unnumbered */, + ({ + ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia); + + if (ip4_destination_matches_route (im, address, x, ia->address_length) + || ip4_destination_matches_route (im, x, address, address_length)) + return clib_error_create ("failed to add %U which conflicts with %U for interface %U", + format_ip4_address_and_length, address, address_length, + format_ip4_address_and_length, x, ia->address_length, + format_vnet_sw_if_index_name, vnm, sw_if_index); + })); + } + + if (vm->mc_main && redistribute) + { + ip4_interface_address_t a; + a.sw_if_index = sw_if_index; + a.address = address[0]; + a.length = address_length; + mc_serialize (vm->mc_main, &ip4_set_interface_address_msg, + &a, (int)is_del); + goto done; + } + + elts_before = pool_elts (lm->if_address_pool); + + error = ip_interface_address_add_del + (lm, + sw_if_index, + addr_fib, + address_length, + is_del, + &if_address_index); + if (error) + goto done; + + if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes) + { + if (is_del) + ip4_del_interface_routes (im, ip4_af.fib_index, address, + address_length); + + else + ip4_add_interface_routes (sw_if_index, + im, ip4_af.fib_index, + pool_elt_at_index + (lm->if_address_pool, if_address_index)); + } + + /* If pool did not grow/shrink: add duplicate address. */ + if (elts_before != pool_elts (lm->if_address_pool)) + { + ip4_add_del_interface_address_callback_t * cb; + vec_foreach (cb, im->add_del_interface_address_callbacks) + cb->function (im, cb->function_opaque, sw_if_index, + address, address_length, + if_address_index, + is_del); + } + + done: + vec_free (addr_fib); + return error; +} + +clib_error_t * +ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, + ip4_address_t * address, u32 address_length, + u32 is_del) +{ + return ip4_add_del_interface_address_internal + (vm, sw_if_index, address, address_length, + /* redistribute */ 1, + /* insert_routes */ 1, + is_del); +} + +static void serialize_ip4_fib (serialize_main_t * m, va_list * va) +{ + ip4_fib_t * f = va_arg (*va, ip4_fib_t *); + u32 l, dst, adj_index; + + serialize_integer (m, f->table_id, sizeof (f->table_id)); + for (l = 0; l < ARRAY_LEN (f->adj_index_by_dst_address); l++) + { + u32 n_elts = hash_elts (f->adj_index_by_dst_address[l]); + + serialize_integer (m, n_elts, sizeof (n_elts)); + hash_foreach (dst, adj_index, f->adj_index_by_dst_address[l], ({ + ip4_address_t tmp; + tmp.as_u32 = dst; + serialize (m, serialize_ip4_address, &tmp); + serialize_integer (m, adj_index, sizeof (adj_index)); + })); + } +} + +static void unserialize_ip4_fib (serialize_main_t * m, va_list * va) +{ + ip4_add_del_route_args_t a; + u32 i; + + a.flags = (IP4_ROUTE_FLAG_ADD + | IP4_ROUTE_FLAG_NO_REDISTRIBUTE + | IP4_ROUTE_FLAG_TABLE_ID); + a.n_add_adj = 0; + a.add_adj = 0; + + unserialize_integer (m, &a.table_index_or_table_id, + sizeof (a.table_index_or_table_id)); + + for (i = 0; i < STRUCT_ARRAY_LEN (ip4_fib_t, adj_index_by_dst_address); i++) + { + u32 n_elts; + unserialize_integer (m, &n_elts, sizeof (u32)); + a.dst_address_length = i; + while (n_elts > 0) + { + unserialize (m, unserialize_ip4_address, &a.dst_address); + unserialize_integer (m, &a.adj_index, sizeof (a.adj_index)); + ip4_add_del_route (&ip4_main, &a); + n_elts--; + } + } +} + +void serialize_vnet_ip4_main (serialize_main_t * m, va_list * va) +{ + vnet_main_t * vnm = va_arg (*va, vnet_main_t *); + vnet_interface_main_t * vim = &vnm->interface_main; + vnet_sw_interface_t * si; + ip4_main_t * i4m = &ip4_main; + ip4_interface_address_t * as = 0, * a; + + /* Download adjacency tables & multipath stuff. */ + serialize (m, serialize_ip_lookup_main, &i4m->lookup_main); + + /* FIBs. */ + { + ip4_fib_t * f; + u32 n_fibs = vec_len (i4m->fibs); + serialize_integer (m, n_fibs, sizeof (n_fibs)); + vec_foreach (f, i4m->fibs) + serialize (m, serialize_ip4_fib, f); + } + + /* FIB interface config. */ + vec_serialize (m, i4m->fib_index_by_sw_if_index, serialize_vec_32); + + /* Interface ip4 addresses. */ + pool_foreach (si, vim->sw_interfaces, ({ + u32 sw_if_index = si->sw_if_index; + ip_interface_address_t * ia; + foreach_ip_interface_address (&i4m->lookup_main, ia, sw_if_index, + 0 /* honor unnumbered */, + ({ + ip4_address_t * x = ip_interface_address_get_address (&i4m->lookup_main, ia); + vec_add2 (as, a, 1); + a->address = x[0]; + a->length = ia->address_length; + a->sw_if_index = sw_if_index; + })); + })); + vec_serialize (m, as, serialize_vec_ip4_set_interface_address); + vec_free (as); +} + +void unserialize_vnet_ip4_main (serialize_main_t * m, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + ip4_main_t * i4m = &ip4_main; + ip4_interface_address_t * as = 0, * a; + + unserialize (m, unserialize_ip_lookup_main, &i4m->lookup_main); + + { + ip_adjacency_t * adj, * adj_heap; + u32 n_adj; + adj_heap = i4m->lookup_main.adjacency_heap; + heap_foreach (adj, n_adj, adj_heap, ({ + unserialize_fixup_ip4_rewrite_adjacencies (vm, adj, n_adj); + ip_call_add_del_adjacency_callbacks (&i4m->lookup_main, adj - adj_heap, /* is_del */ 0); + })); + } + + /* FIBs */ + { + u32 i, n_fibs; + unserialize_integer (m, &n_fibs, sizeof (n_fibs)); + for (i = 0; i < n_fibs; i++) + unserialize (m, unserialize_ip4_fib); + } + + vec_unserialize (m, &i4m->fib_index_by_sw_if_index, unserialize_vec_32); + + vec_unserialize (m, &as, unserialize_vec_ip4_set_interface_address); + vec_foreach (a, as) { + ip4_add_del_interface_address_internal + (vm, a->sw_if_index, &a->address, a->length, + /* redistribute */ 0, + /* insert_routes */ 0, + /* is_del */ 0); + } + vec_free (as); +} + +static clib_error_t * +ip4_sw_interface_admin_up_down (vnet_main_t * vnm, + u32 sw_if_index, + u32 flags) +{ + ip4_main_t * im = &ip4_main; + ip_interface_address_t * ia; + ip4_address_t * a; + u32 is_admin_up, fib_index; + + /* Fill in lookup tables with default table (0). */ + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + + vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0); + + is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + + fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); + + foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, + 0 /* honor unnumbered */, + ({ + a = ip_interface_address_get_address (&im->lookup_main, ia); + if (is_admin_up) + ip4_add_interface_routes (sw_if_index, + im, fib_index, + ia); + else + ip4_del_interface_routes (im, fib_index, + a, ia->address_length); + })); + + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down); + +static clib_error_t * +ip4_sw_interface_add_del (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + vlib_main_t * vm = vnm->vlib_main; + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 ci, cast; + + for (cast = 0; cast < VNET_N_CAST; cast++) + { + ip_config_main_t * cm = &lm->rx_config_mains[cast]; + vnet_config_main_t * vcm = &cm->config_main; + + if (! vcm->node_index_by_feature_index) + { + if (cast == VNET_UNICAST) + { + static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", }; + static char * feature_nodes[] = { + [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl", + [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx", + [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any", + [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4", + [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4", + [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup", + }; + + vnet_config_init (vm, vcm, + start_nodes, ARRAY_LEN (start_nodes), + feature_nodes, ARRAY_LEN (feature_nodes)); + } + else + { + static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", }; + static char * feature_nodes[] = { + [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4", + [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast", + }; + + vnet_config_init (vm, vcm, + start_nodes, ARRAY_LEN (start_nodes), + feature_nodes, ARRAY_LEN (feature_nodes)); + } + } + + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; + + if (is_add) + ci = vnet_config_add_feature (vm, vcm, + ci, + IP4_RX_FEATURE_LOOKUP, + /* config data */ 0, + /* # bytes of config data */ 0); + else + ci = vnet_config_del_feature (vm, vcm, + ci, + IP4_RX_FEATURE_LOOKUP, + /* config data */ 0, + /* # bytes of config data */ 0); + + cm->config_index_by_sw_if_index[sw_if_index] = ci; + } + + return /* no error */ 0; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del); + +VLIB_REGISTER_NODE (ip4_lookup_node) = { + .function = ip4_lookup, + .name = "ip4-lookup", + .vector_size = sizeof (u32), + + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = { + [IP_LOOKUP_NEXT_MISS] = "ip4-miss", + [IP_LOOKUP_NEXT_DROP] = "ip4-drop", + [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", + [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", + [IP_LOOKUP_NEXT_ARP] = "ip4-arp", + [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", + [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify", + [IP_LOOKUP_NEXT_MAP] = "ip4-map", + [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t", + [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd", + [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop", + [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", + [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", + }, +}; + +/* Global IP4 main. */ +ip4_main_t ip4_main; + +clib_error_t * +ip4_lookup_init (vlib_main_t * vm) +{ + ip4_main_t * im = &ip4_main; + uword i; + + for (i = 0; i < ARRAY_LEN (im->fib_masks); i++) + { + u32 m; + + if (i < 32) + m = pow2_mask (i) << (32 - i); + else + m = ~0; + im->fib_masks[i] = clib_host_to_net_u32 (m); + } + + /* Create FIB with index 0 and table id of 0. */ + find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID); + + ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0); + + { + pg_node_t * pn; + pn = pg_get_node (ip4_lookup_node.index); + pn->unformat_edit = unformat_pg_ip4_header; + } + + { + ethernet_arp_header_t h; + + memset (&h, 0, sizeof (h)); + + /* Set target ethernet address to all zeros. */ + memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet)); + +#define _16(f,v) h.f = clib_host_to_net_u16 (v); +#define _8(f,v) h.f = v; + _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet); + _16 (l3_type, ETHERNET_TYPE_IP4); + _8 (n_l2_address_bytes, 6); + _8 (n_l3_address_bytes, 4); + _16 (opcode, ETHERNET_ARP_OPCODE_request); +#undef _16 +#undef _8 + + vlib_packet_template_init (vm, + &im->ip4_arp_request_packet_template, + /* data */ &h, + sizeof (h), + /* alloc chunk size */ 8, + "ip4 arp"); + } + + return 0; +} + +VLIB_INIT_FUNCTION (ip4_lookup_init); + +typedef struct { + /* Adjacency taken. */ + u32 adj_index; + u32 flow_hash; + u32 fib_index; + + /* Packet data, possibly *after* rewrite. */ + u8 packet_data[64 - 1*sizeof(u32)]; +} ip4_forward_next_trace_t; + +static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *); + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im = &ip4_main; + ip_adjacency_t * adj; + uword indent = format_get_indent (s); + + adj = ip_get_adjacency (&im->lookup_main, t->adj_index); + s = format (s, "fib: %d adjacency: %U flow hash: 0x%08x", + t->fib_index, format_ip_adjacency, + vnm, &im->lookup_main, t->adj_index, t->flow_hash); + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_REWRITE: + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip_adjacency_packet_data, + vnm, &im->lookup_main, t->adj_index, + t->packet_data, sizeof (t->packet_data)); + break; + + default: + break; + } + + return s; +} + +/* Common trace function for all ip4-forward next nodes. */ +void +ip4_forward_next_trace (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + vlib_rx_or_tx_t which_adj_index) +{ + u32 * from, n_left; + ip4_main_t * im = &ip4_main; + + n_left = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left >= 4) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + ip4_forward_next_trace_t * t0, * t1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + vlib_prefetch_buffer_with_index (vm, from[3], LOAD); + + bi0 = from[0]; + bi1 = from[1]; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; + t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; + t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + memcpy (t0->packet_data, + vlib_buffer_get_current (b0), + sizeof (t0->packet_data)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index]; + t1->flow_hash = vnet_buffer (b1)->ip.flow_hash; + t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer(b1)->sw_if_index[VLIB_RX]); + memcpy (t1->packet_data, + vlib_buffer_get_current (b1), + sizeof (t1->packet_data)); + } + from += 2; + n_left -= 2; + } + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + ip4_forward_next_trace_t * t0; + + bi0 = from[0]; + + b0 = vlib_get_buffer (vm, bi0); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; + t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; + t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + memcpy (t0->packet_data, + vlib_buffer_get_current (b0), + sizeof (t0->packet_data)); + } + from += 1; + n_left -= 1; + } +} + +static uword +ip4_drop_or_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + ip4_error_t error_code) +{ + u32 * buffers = vlib_frame_vector_args (frame); + uword n_packets = frame->n_vectors; + + vlib_error_drop_buffers (vm, node, + buffers, + /* stride */ 1, + n_packets, + /* next */ 0, + ip4_input_node.index, + error_code); + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, VLIB_TX); + + return n_packets; +} + +static uword +ip4_drop (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); } + +static uword +ip4_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); } + +static uword +ip4_miss (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); } + +VLIB_REGISTER_NODE (ip4_drop_node,static) = { + .function = ip4_drop, + .name = "ip4-drop", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE (ip4_punt_node,static) = { + .function = ip4_punt, + .name = "ip4-punt", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-punt", + }, +}; + +VLIB_REGISTER_NODE (ip4_miss_node,static) = { + .function = ip4_miss, + .name = "ip4-miss", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +/* Compute TCP/UDP/ICMP4 checksum in software. */ +u16 +ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip4_header_t * ip0) +{ + ip_csum_t sum0; + u32 ip_header_length, payload_length_host_byte_order; + u32 n_this_buffer, n_bytes_left; + u16 sum16; + void * data_this_buffer; + + /* Initialize checksum with ip header. */ + ip_header_length = ip4_header_bytes (ip0); + payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length; + sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16)); + + if (BITS (uword) == 32) + { + sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32)); + sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32)); + } + else + sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64)); + + n_bytes_left = n_this_buffer = payload_length_host_byte_order; + data_this_buffer = (void *) ip0 + ip_header_length; + if (n_this_buffer + ip_header_length > p0->current_length) + n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0; + while (1) + { + sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); + n_bytes_left -= n_this_buffer; + if (n_bytes_left == 0) + break; + + ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT); + p0 = vlib_get_buffer (vm, p0->next_buffer); + data_this_buffer = vlib_buffer_get_current (p0); + n_this_buffer = p0->current_length; + } + + sum16 = ~ ip_csum_fold (sum0); + + return sum16; +} + +static u32 +ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) +{ + ip4_header_t * ip0 = vlib_buffer_get_current (p0); + udp_header_t * udp0; + u16 sum16; + + ASSERT (ip0->protocol == IP_PROTOCOL_TCP + || ip0->protocol == IP_PROTOCOL_UDP); + + udp0 = (void *) (ip0 + 1); + if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0) + { + p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED + | IP_BUFFER_L4_CHECKSUM_CORRECT); + return p0->flags; + } + + sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0); + + p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED + | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT)); + + return p0->flags; +} + +static uword +ip4_local (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + ip_local_next_t next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, VLIB_TX); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * p0, * p1; + ip4_header_t * ip0, * ip1; + udp_header_t * udp0, * udp1; + ip4_fib_mtrie_t * mtrie0, * mtrie1; + ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip_adjacency_t * adj0, * adj1; + u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0; + u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1; + i32 len_diff0, len_diff1; + u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; + u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1; + u8 enqueue_code; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer(p0)->sw_if_index[VLIB_RX]); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer(p1)->sw_if_index[VLIB_RX]); + + mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; + mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; + + leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0); + + proto0 = ip0->protocol; + proto1 = ip1->protocol; + is_udp0 = proto0 == IP_PROTOCOL_UDP; + is_udp1 = proto1 == IP_PROTOCOL_UDP; + is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; + is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP; + + flags0 = p0->flags; + flags1 = p1->flags; + + good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + + udp0 = ip4_next_header (ip0); + udp1 = ip4_next_header (ip1); + + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; + good_tcp_udp1 |= is_udp1 && udp1->checksum == 0; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1); + + /* Verify UDP length. */ + ip_len0 = clib_net_to_host_u16 (ip0->length); + ip_len1 = clib_net_to_host_u16 (ip1->length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + udp_len1 = clib_net_to_host_u16 (udp1->length); + + len_diff0 = ip_len0 - udp_len0; + len_diff1 = ip_len1 - udp_len1; + + len_diff0 = is_udp0 ? len_diff0 : 0; + len_diff1 = is_udp1 ? len_diff1 : 0; + + if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1 + & good_tcp_udp0 & good_tcp_udp1))) + { + if (is_tcp_udp0) + { + if (is_tcp_udp0 + && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)) + flags0 = ip4_tcp_udp_validate_checksum (vm, p0); + good_tcp_udp0 = + (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; + } + if (is_tcp_udp1) + { + if (is_tcp_udp1 + && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED)) + flags1 = ip4_tcp_udp_validate_checksum (vm, p1); + good_tcp_udp1 = + (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_tcp_udp1 |= is_udp1 && udp1->checksum == 0; + } + } + + good_tcp_udp0 &= len_diff0 >= 0; + good_tcp_udp1 &= len_diff1 >= 0; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2); + + error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL; + + error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0; + error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1; + + ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM); + error0 = (is_tcp_udp0 && ! good_tcp_udp0 + ? IP4_ERROR_TCP_CHECKSUM + is_udp0 + : error0); + error1 = (is_tcp_udp1 && ! good_tcp_udp1 + ? IP4_ERROR_TCP_CHECKSUM + is_udp1 + : error1); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3); + + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + + vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; + + ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, + &ip0->src_address, + /* no_default_route */ 1)); + ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, + &ip1->src_address, + /* no_default_route */ 1)); + + adj0 = ip_get_adjacency (lm, adj_index0); + adj1 = ip_get_adjacency (lm, adj_index1); + + /* + * Must have a route to source otherwise we drop the packet. + * ip4 broadcasts are accepted, e.g. to make dhcp client work + */ + error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL + && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE + && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP + && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL + && ip0->dst_address.as_u32 != 0xFFFFFFFF + ? IP4_ERROR_SRC_LOOKUP_MISS + : error0); + error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL + && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE + && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP + && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL + && ip0->dst_address.as_u32 != 0xFFFFFFFF + ? IP4_ERROR_SRC_LOOKUP_MISS + : error1); + + next0 = lm->local_next_by_ip_protocol[proto0]; + next1 = lm->local_next_by_ip_protocol[proto1]; + + next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; + next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1; + + p0->error = error0 ? error_node->errors[error0] : 0; + p1->error = error1 ? error_node->errors[error1] : 0; + + enqueue_code = (next0 != next_index) + 2*(next1 != next_index); + + if (PREDICT_FALSE (enqueue_code != 0)) + { + switch (enqueue_code) + { + case 1: + /* A B A */ + to_next[-2] = pi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next1, pi1); + break; + + case 3: + /* A B B or A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + vlib_set_next_frame_buffer (vm, node, next1, pi1); + if (next0 == next1) + { + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + next_index = next1; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + } + break; + } + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + udp_header_t * udp0; + ip4_fib_mtrie_t * mtrie0; + ip4_fib_mtrie_leaf_t leaf0; + ip_adjacency_t * adj0; + u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0; + i32 len_diff0; + u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer(p0)->sw_if_index[VLIB_RX]); + + mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; + + leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); + + proto0 = ip0->protocol; + is_udp0 = proto0 == IP_PROTOCOL_UDP; + is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; + + flags0 = p0->flags; + + good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + + udp0 = ip4_next_header (ip0); + + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); + + /* Verify UDP length. */ + ip_len0 = clib_net_to_host_u16 (ip0->length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + + len_diff0 = ip_len0 - udp_len0; + + len_diff0 = is_udp0 ? len_diff0 : 0; + + if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0))) + { + if (is_tcp_udp0) + { + if (is_tcp_udp0 + && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)) + flags0 = ip4_tcp_udp_validate_checksum (vm, p0); + good_tcp_udp0 = + (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; + } + } + + good_tcp_udp0 &= len_diff0 >= 0; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); + + error0 = IP4_ERROR_UNKNOWN_PROTOCOL; + + error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0; + + ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM); + error0 = (is_tcp_udp0 && ! good_tcp_udp0 + ? IP4_ERROR_TCP_CHECKSUM + is_udp0 + : error0); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); + + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + + ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, + &ip0->src_address, + /* no_default_route */ 1)); + + adj0 = ip_get_adjacency (lm, adj_index0); + + /* Must have a route to source otherwise we drop the packet. */ + error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL + && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE + && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP + && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL + && ip0->dst_address.as_u32 != 0xFFFFFFFF + ? IP4_ERROR_SRC_LOOKUP_MISS + : error0); + + next0 = lm->local_next_by_ip_protocol[proto0]; + + next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; + + p0->error = error0? error_node->errors[error0] : 0; + + if (PREDICT_FALSE (next0 != next_index)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + next_index = next0; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip4_local_node,static) = { + .function = ip4_local, + .name = "ip4-local", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + + .n_next_nodes = IP_LOCAL_N_NEXT, + .next_nodes = { + [IP_LOCAL_NEXT_DROP] = "error-drop", + [IP_LOCAL_NEXT_PUNT] = "error-punt", + // [IP_LOCAL_NEXT_TCP_LOOKUP] = "ip4-tcp-lookup", + [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup", + [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input", + }, +}; + +void ip4_register_protocol (u32 protocol, u32 node_index) +{ + vlib_main_t * vm = vlib_get_main(); + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + + ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol)); + lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index); +} + +static clib_error_t * +show_ip_local_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + int i; + + vlib_cli_output (vm, "Protocols handled by ip4_local"); + for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++) + { + if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT) + vlib_cli_output (vm, "%d", i); + } + return 0; +} + + + +VLIB_CLI_COMMAND (show_ip_local, static) = { + .path = "show ip local", + .function = show_ip_local_command_fn, + .short_help = "Show ip local protocol table", +}; + +static uword +ip4_arp (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 * from, * to_next_drop; + uword n_left_from, n_left_to_next_drop, next_index; + static f64 time_last_seed_change = -1e100; + static u32 hash_seeds[3]; + static uword hash_bitmap[256 / BITS (uword)]; + f64 time_now; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, VLIB_TX); + + time_now = vlib_time_now (vm); + if (time_now - time_last_seed_change > 1e-3) + { + uword i; + u32 * r = clib_random_buffer_get_data (&vm->random_buffer, + sizeof (hash_seeds)); + for (i = 0; i < ARRAY_LEN (hash_seeds); i++) + hash_seeds[i] = r[i]; + + /* Mark all hash keys as been no-seen before. */ + for (i = 0; i < ARRAY_LEN (hash_bitmap); i++) + hash_bitmap[i] = 0; + + time_last_seed_change = time_now; + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + if (next_index == IP4_ARP_NEXT_DROP) + next_index = IP4_ARP_N_NEXT; /* point to first interface */ + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP, + to_next_drop, n_left_to_next_drop); + + while (n_left_from > 0 && n_left_to_next_drop > 0) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + ethernet_header_t * eh0; + u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0; + uword bm0; + ip_adjacency_t * adj0; + + pi0 = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj0 = ip_get_adjacency (lm, adj_index0); + ip0 = vlib_buffer_get_current (p0); + + /* + * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP + * rewrite to this packet, we need to skip it here. + * Note, to distinguish from src IP addr *.8.6.*, we + * check for a bcast eth dest instead of IPv4 version. + */ + eh0 = (ethernet_header_t*)ip0; + if ((ip0->ip_version_and_header_length & 0xF0) != 0x40) + { + u32 vlan_num = 0; + u16 * etype = &eh0->type; + while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q + || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad + { + vlan_num += 1; + etype += 2; //vlan tag also 16 bits, same as etype + } + if (*etype == clib_host_to_net_u16 (0x0806)) //arp + { + vlib_buffer_advance ( + p0, sizeof(ethernet_header_t) + (4*vlan_num)); + ip0 = vlib_buffer_get_current (p0); + } + } + + a0 = hash_seeds[0]; + b0 = hash_seeds[1]; + c0 = hash_seeds[2]; + + sw_if_index0 = adj0->rewrite_header.sw_if_index; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + + a0 ^= ip0->dst_address.data_u32; + b0 ^= sw_if_index0; + + hash_v3_finalize32 (a0, b0, c0); + + c0 &= BITS (hash_bitmap) - 1; + c0 = c0 / BITS (uword); + m0 = (uword) 1 << (c0 % BITS (uword)); + + bm0 = hash_bitmap[c0]; + drop0 = (bm0 & m0) != 0; + + /* Mark it as seen. */ + hash_bitmap[c0] = bm0 | m0; + + from += 1; + n_left_from -= 1; + to_next_drop[0] = pi0; + to_next_drop += 1; + n_left_to_next_drop -= 1; + + p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT]; + + if (drop0) + continue; + + /* + * Can happen if the control-plane is programming tables + * with traffic flowing; at least that's today's lame excuse. + */ + if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) + { + p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ]; + } + else + /* Send ARP request. */ + { + u32 bi0 = 0; + vlib_buffer_t * b0; + ethernet_arp_header_t * h0; + vnet_hw_interface_t * hw_if0; + + h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0); + + /* Add rewrite/encap string for ARP packet. */ + vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t)); + + hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + /* Src ethernet address in ARP header. */ + memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address, + sizeof (h0->ip4_over_ethernet[0].ethernet)); + + ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0); + + /* Copy in destination address we are requesting. */ + h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32; + + vlib_buffer_copy_trace_flag (vm, p0, bi0); + b0 = vlib_get_buffer (vm, bi0); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0; + + vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); + + vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0); + } + } + + vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop); + } + + return frame->n_vectors; +} + +static char * ip4_arp_error_strings[] = { + [IP4_ARP_ERROR_DROP] = "address overflow drops", + [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent", + [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies", + [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed", + [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed", +}; + +VLIB_REGISTER_NODE (ip4_arp_node) = { + .function = ip4_arp, + .name = "ip4-arp", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + + .n_errors = ARRAY_LEN (ip4_arp_error_strings), + .error_strings = ip4_arp_error_strings, + + .n_next_nodes = IP4_ARP_N_NEXT, + .next_nodes = { + [IP4_ARP_NEXT_DROP] = "error-drop", + }, +}; + +#define foreach_notrace_ip4_arp_error \ +_(DROP) \ +_(REQUEST_SENT) \ +_(REPLICATE_DROP) \ +_(REPLICATE_FAIL) + +clib_error_t * arp_notrace_init (vlib_main_t * vm) +{ + vlib_node_runtime_t *rt = + vlib_node_get_runtime (vm, ip4_arp_node.index); + + /* don't trace ARP request packets */ +#define _(a) \ + vnet_pcap_drop_trace_filter_add_del \ + (rt->errors[IP4_ARP_ERROR_##a], \ + 1 /* is_add */); + foreach_notrace_ip4_arp_error; +#undef _ + return 0; +} + +VLIB_INIT_FUNCTION(arp_notrace_init); + + +/* Send an ARP request to see if given destination is reachable on given interface. */ +clib_error_t * +ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) +{ + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im = &ip4_main; + ethernet_arp_header_t * h; + ip4_address_t * src; + ip_interface_address_t * ia; + ip_adjacency_t * adj; + vnet_hw_interface_t * hi; + vnet_sw_interface_t * si; + vlib_buffer_t * b; + u32 bi = 0; + + si = vnet_get_sw_interface (vnm, sw_if_index); + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + return clib_error_return (0, "%U: interface %U down", + format_ip4_address, dst, + format_vnet_sw_if_index_name, vnm, + sw_if_index); + } + + src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia); + if (! src) + { + vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE; + return clib_error_return + (0, "no matching interface address for destination %U (interface %U)", + format_ip4_address, dst, + format_vnet_sw_if_index_name, vnm, sw_if_index); + } + + adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index); + + h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi); + + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + + memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet)); + + h->ip4_over_ethernet[0].ip4 = src[0]; + h->ip4_over_ethernet[1].ip4 = dst[0]; + + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + + /* Add encapsulation string for software interface (e.g. ethernet header). */ + vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); + vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); + + { + vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 * to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } + + return /* no error */ 0; +} + +typedef enum { + IP4_REWRITE_NEXT_DROP, + IP4_REWRITE_NEXT_ARP, +} ip4_rewrite_next_t; + +always_inline uword +ip4_rewrite_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int rewrite_for_locally_received_packets) +{ + ip_lookup_main_t * lm = &ip4_main.lookup_main; + u32 * from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, * to_next, next_index; + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index); + vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + u32 cpu_index = os_get_cpu_number(); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_adjacency_t * adj0, * adj1; + vlib_buffer_t * p0, * p1; + ip4_header_t * ip0, * ip1; + u32 pi0, rw_len0, next0, error0, checksum0, adj_index0; + u32 pi1, rw_len1, next1, error1, checksum1, adj_index1; + u32 next0_override, next1_override; + + if (rewrite_for_locally_received_packets) + next0_override = next1_override = 0; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx]; + adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx]; + + /* We should never rewrite a pkt using the MISS adjacency */ + ASSERT(adj_index0 && adj_index1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + error0 = error1 = IP4_ERROR_NONE; + + /* Decrement TTL & update checksum. + Works either endian, so no need for byte swap. */ + if (! rewrite_for_locally_received_packets) + { + i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl; + + /* Input node should have reject packets with ttl 0. */ + ASSERT (ip0->ttl > 0); + ASSERT (ip1->ttl > 0); + + checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100); + checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100); + + checksum0 += checksum0 >= 0xffff; + checksum1 += checksum1 >= 0xffff; + + ip0->checksum = checksum0; + ip1->checksum = checksum1; + + ttl0 -= 1; + ttl1 -= 1; + + ip0->ttl = ttl0; + ip1->ttl = ttl1; + + error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0; + error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1; + + /* Verify checksum. */ + ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + ASSERT (ip1->checksum == ip4_header_checksum (ip1)); + } + + /* Rewrite packet header and updates lengths. */ + adj0 = ip_get_adjacency (lm, adj_index0); + adj1 = ip_get_adjacency (lm, adj_index1); + + if (rewrite_for_locally_received_packets) + { + /* + * If someone sends e.g. an icmp4 w/ src = dst = interface addr, + * we end up here with a local adjacency in hand + * The local adj rewrite data is 0xfefe on purpose. + * Bad engineer, no donut for you. + */ + if (PREDICT_FALSE(adj0->lookup_next_index + == IP_LOOKUP_NEXT_LOCAL)) + error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS; + if (PREDICT_FALSE(adj0->lookup_next_index + == IP_LOOKUP_NEXT_ARP)) + next0_override = IP4_REWRITE_NEXT_ARP; + if (PREDICT_FALSE(adj1->lookup_next_index + == IP_LOOKUP_NEXT_LOCAL)) + error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS; + if (PREDICT_FALSE(adj1->lookup_next_index + == IP_LOOKUP_NEXT_ARP)) + next1_override = IP4_REWRITE_NEXT_ARP; + } + + /* Worth pipelining. No guarantee that adj0,1 are hot... */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + rw_len1 = adj1[0].rewrite_header.data_bytes; + next0 = (error0 == IP4_ERROR_NONE) + ? adj0[0].rewrite_header.next_index : 0; + + if (rewrite_for_locally_received_packets) + next0 = next0 && next0_override ? next0_override : next0; + + next1 = (error1 == IP4_ERROR_NONE) + ? adj1[0].rewrite_header.next_index : 0; + + if (rewrite_for_locally_received_packets) + next1 = next1 && next1_override ? next1_override : next1; + + /* + * We've already accounted for an ethernet_header_t elsewhere + */ + if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) + vlib_increment_combined_counter + (&lm->adjacency_counters, + cpu_index, adj_index0, + /* packet increment */ 0, + /* byte increment */ rw_len0-sizeof(ethernet_header_t)); + + if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t))) + vlib_increment_combined_counter + (&lm->adjacency_counters, + cpu_index, adj_index1, + /* packet increment */ 0, + /* byte increment */ rw_len1-sizeof(ethernet_header_t)); + + /* Check MTU of outgoing interface. */ + error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes + ? IP4_ERROR_MTU_EXCEEDED + : error0); + error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes + ? IP4_ERROR_MTU_EXCEEDED + : error1); + + p0->current_data -= rw_len0; + p1->current_data -= rw_len1; + + p0->current_length += rw_len0; + p1->current_length += rw_len1; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index; + vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index; + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, + sizeof (ethernet_header_t)); + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t * adj0; + vlib_buffer_t * p0; + ip4_header_t * ip0; + u32 pi0, rw_len0, adj_index0, next0, error0, checksum0; + u32 next0_override; + + if (rewrite_for_locally_received_packets) + next0_override = 0; + + pi0 = to_next[0] = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx]; + + /* We should never rewrite a pkt using the MISS adjacency */ + ASSERT(adj_index0); + + adj0 = ip_get_adjacency (lm, adj_index0); + + ip0 = vlib_buffer_get_current (p0); + + error0 = IP4_ERROR_NONE; + next0 = 0; /* drop on error */ + + /* Decrement TTL & update checksum. */ + if (! rewrite_for_locally_received_packets) + { + i32 ttl0 = ip0->ttl; + + checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100); + + checksum0 += checksum0 >= 0xffff; + + ip0->checksum = checksum0; + + ASSERT (ip0->ttl > 0); + + ttl0 -= 1; + + ip0->ttl = ttl0; + + ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + + error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0; + } + + if (rewrite_for_locally_received_packets) + { + /* + * If someone sends e.g. an icmp4 w/ src = dst = interface addr, + * we end up here with a local adjacency in hand + * The local adj rewrite data is 0xfefe on purpose. + * Bad engineer, no donut for you. + */ + if (PREDICT_FALSE(adj0->lookup_next_index + == IP_LOOKUP_NEXT_LOCAL)) + error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS; + /* + * We have to override the next_index in ARP adjacencies, + * because they're set up for ip4-arp, not this node... + */ + if (PREDICT_FALSE(adj0->lookup_next_index + == IP_LOOKUP_NEXT_ARP)) + next0_override = IP4_REWRITE_NEXT_ARP; + } + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], ip0, + sizeof (ethernet_header_t)); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + + if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) + vlib_increment_combined_counter + (&lm->adjacency_counters, + cpu_index, adj_index0, + /* packet increment */ 0, + /* byte increment */ rw_len0-sizeof(ethernet_header_t)); + + /* Check MTU of outgoing interface. */ + error0 = (vlib_buffer_length_in_chain (vm, p0) + > adj0[0].rewrite_header.max_l3_packet_bytes + ? IP4_ERROR_MTU_EXCEEDED + : error0); + + p0->error = error_node->errors[error0]; + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + + next0 = (error0 == IP4_ERROR_NONE) + ? adj0[0].rewrite_header.next_index : 0; + + if (rewrite_for_locally_received_packets) + next0 = next0 && next0_override ? next0_override : next0; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Need to do trace after rewrites to pick up new packet data. */ + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, adj_rx_tx); + + return frame->n_vectors; +} + +static uword +ip4_rewrite_transit (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_rewrite_inline (vm, node, frame, + /* rewrite_for_locally_received_packets */ 0); +} + +static uword +ip4_rewrite_local (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_rewrite_inline (vm, node, frame, + /* rewrite_for_locally_received_packets */ 1); +} + +VLIB_REGISTER_NODE (ip4_rewrite_node) = { + .function = ip4_rewrite_transit, + .name = "ip4-rewrite-transit", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + + .n_next_nodes = 2, + .next_nodes = { + [IP4_REWRITE_NEXT_DROP] = "error-drop", + [IP4_REWRITE_NEXT_ARP] = "ip4-arp", + }, +}; + +VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = { + .function = ip4_rewrite_local, + .name = "ip4-rewrite-local", + .vector_size = sizeof (u32), + + .sibling_of = "ip4-rewrite-transit", + + .format_trace = format_ip4_forward_next_trace, + + .n_next_nodes = 2, + .next_nodes = { + [IP4_REWRITE_NEXT_DROP] = "error-drop", + [IP4_REWRITE_NEXT_ARP] = "ip4-arp", + }, +}; + +static clib_error_t * +add_del_interface_table (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index, table_id; + + sw_if_index = ~0; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "%d", &table_id)) + ; + else + { + error = clib_error_return (0, "expected table id `%U'", + format_unformat_error, input); + goto done; + } + + { + ip4_main_t * im = &ip4_main; + ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID); + + if (fib) + { + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + im->fib_index_by_sw_if_index[sw_if_index] = fib->index; + } + } + + done: + return error; +} + +VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = { + .path = "set interface ip table", + .function = add_del_interface_table, + .short_help = "Add/delete FIB table id for interface", +}; + + +static uword +ip4_lookup_multicast (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + ip_lookup_next_t next; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * p0, * p1; + u32 pi0, pi1, adj_index0, adj_index1, wrong_next; + ip_lookup_next_t next0, next1; + ip4_header_t * ip0, * ip1; + ip_adjacency_t * adj0, * adj1; + u32 fib_index0, fib_index1; + u32 flow_hash_config0, flow_hash_config1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); + fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? + fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; + fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ? + fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX]; + + adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, + &ip0->dst_address, p0); + adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, + &ip1->dst_address, p1); + + adj0 = ip_get_adjacency (lm, adj_index0); + adj1 = ip_get_adjacency (lm, adj_index1); + + next0 = adj0->lookup_next_index; + next1 = adj1->lookup_next_index; + + flow_hash_config0 = + vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; + + flow_hash_config1 = + vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config; + + vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash + (ip0, flow_hash_config0); + + vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash + (ip1, flow_hash_config1); + + ASSERT (adj0->n_adj > 0); + ASSERT (adj1->n_adj > 0); + ASSERT (is_pow2 (adj0->n_adj)); + ASSERT (is_pow2 (adj1->n_adj)); + adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); + adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1)); + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; + + if (1) /* $$$$$$ HACK FIXME */ + vlib_increment_combined_counter + (cm, cpu_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0)); + if (1) /* $$$$$$ HACK FIXME */ + vlib_increment_combined_counter + (cm, cpu_index, adj_index1, 1, + vlib_buffer_length_in_chain (vm, p1)); + + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + wrong_next = (next0 != next) + 2*(next1 != next); + if (PREDICT_FALSE (wrong_next != 0)) + { + switch (wrong_next) + { + case 1: + /* A B A */ + to_next[-2] = pi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next1, pi1); + break; + + case 3: + /* A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + vlib_set_next_frame_buffer (vm, node, next1, pi1); + if (next0 == next1) + { + /* A B B */ + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next1; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + } + } + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + u32 pi0, adj_index0; + ip_lookup_next_t next0; + ip_adjacency_t * adj0; + u32 fib_index0; + u32 flow_hash_config0; + + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? + fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; + + adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, + &ip0->dst_address, p0); + + adj0 = ip_get_adjacency (lm, adj_index0); + + next0 = adj0->lookup_next_index; + + flow_hash_config0 = + vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; + + vnet_buffer (p0)->ip.flow_hash = + ip4_compute_flow_hash (ip0, flow_hash_config0); + + ASSERT (adj0->n_adj > 0); + ASSERT (is_pow2 (adj0->n_adj)); + adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + + if (1) /* $$$$$$ HACK FIXME */ + vlib_increment_combined_counter + (cm, cpu_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = { + .function = ip4_lookup_multicast, + .name = "ip4-lookup-multicast", + .vector_size = sizeof (u32), + + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = { + [IP_LOOKUP_NEXT_MISS] = "ip4-miss", + [IP_LOOKUP_NEXT_DROP] = "ip4-drop", + [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", + [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", + [IP_LOOKUP_NEXT_ARP] = "ip4-arp", + [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", + [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify", + [IP_LOOKUP_NEXT_MAP] = "ip4-map", + [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t", + [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd", + [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop", + [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", + [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", + }, +}; + +VLIB_REGISTER_NODE (ip4_multicast_node,static) = { + .function = ip4_drop, + .name = "ip4-multicast", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0) +{ + ip4_main_t * im = &ip4_main; + ip4_fib_mtrie_t * mtrie0; + ip4_fib_mtrie_leaf_t leaf0; + u32 adj_index0; + + mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; + + leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3); + + /* Handle default route. */ + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + + adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + + return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, + a, + /* no_default_route */ 0); +} + +static clib_error_t * +test_lookup_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 table_id = 0; + f64 count = 1; + u32 n; + int i; + ip4_address_t ip4_base_address; + u64 errors = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "table %d", &table_id)) + ; + else if (unformat (input, "count %f", &count)) + ; + + else if (unformat (input, "%U", + unformat_ip4_address, &ip4_base_address)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + n = count; + + for (i = 0; i < n; i++) + { + if (!ip4_lookup_validate (&ip4_base_address, table_id)) + errors++; + + ip4_base_address.as_u32 = + clib_host_to_net_u32 (1 + + clib_net_to_host_u32 (ip4_base_address.as_u32)); + } + + if (errors) + vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n); + else + vlib_cli_output (vm, "No errors in %d lookups\n", n); + + return 0; +} + +VLIB_CLI_COMMAND (lookup_test_command, static) = { + .path = "test lookup", + .short_help = "test lookup", + .function = test_lookup_command_fn, +}; + +int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config) +{ + ip4_main_t * im4 = &ip4_main; + ip4_fib_t * fib; + uword * p = hash_get (im4->fib_index_by_table_id, table_id); + + if (p == 0) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib = vec_elt_at_index (im4->fibs, p[0]); + + fib->flow_hash_config = flow_hash_config; + return 0; +} + +static clib_error_t * +set_ip_flow_hash_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int matched = 0; + u32 table_id = 0; + u32 flow_hash_config = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "table %d", &table_id)) + matched = 1; +#define _(a,v) \ + else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;} + foreach_flow_hash_bit +#undef _ + else break; + } + + if (matched == 0) + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config); + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "no such FIB table %d", table_id); + + default: + clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config); + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = { + .path = "set ip flow-hash", + .short_help = + "set ip table flow-hash table <fib-id> src dst sport dport proto reverse", + .function = set_ip_flow_hash_command_fn, +}; + +int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 table_index) +{ + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + ip4_main_t * ipm = &ip4_main; + ip_lookup_main_t * lm = &ipm->lookup_main; + vnet_classify_main_t * cm = &vnet_classify_main; + + if (pool_is_free_index (im->sw_interfaces, sw_if_index)) + return VNET_API_ERROR_NO_MATCHING_INTERFACE; + + if (table_index != ~0 && pool_is_free_index (cm->tables, table_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index); + lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index; + + return 0; +} + +static clib_error_t * +set_ip_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 table_index = ~0; + int table_index_set = 0; + u32 sw_if_index = ~0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "table-index %d", &table_index)) + table_index_set = 1; + else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, + vnet_get_main(), &sw_if_index)) + ; + else + break; + } + + if (table_index_set == 0) + return clib_error_return (0, "classify table-index must be specified"); + + if (sw_if_index == ~0) + return clib_error_return (0, "interface / subif must be specified"); + + rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_MATCHING_INTERFACE: + return clib_error_return (0, "No such interface"); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "No such classifier table"); + } + return 0; +} + +VLIB_CLI_COMMAND (set_ip_classify_command, static) = { + .path = "set ip classify", + .short_help = + "set ip classify intfc <int> table-index <index>", + .function = set_ip_classify_command_fn, +}; + diff --git a/vnet/vnet/ip/ip4_hop_by_hop.c b/vnet/vnet/ip/ip4_hop_by_hop.c new file mode 100644 index 00000000000..ee2bcc0ae75 --- /dev/null +++ b/vnet/vnet/ip/ip4_hop_by_hop.c @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> + +#include <vnet/ip/ip.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +typedef struct { + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} ip4_hop_by_hop_main_t; + +ip4_hop_by_hop_main_t ip4_hop_by_hop_main; + +vlib_node_registration_t ip4_hop_by_hop_node; + +typedef struct { + u32 next_index; +} ip4_hop_by_hop_trace_t; + +/* packet trace format function */ +static u8 * format_ip4_hop_by_hop_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip4_hop_by_hop_trace_t * t = va_arg (*args, ip4_hop_by_hop_trace_t *); + + s = format (s, "IP4_HOP_BY_HOP: next index %d", + t->next_index); + return s; +} + +vlib_node_registration_t ip4_hop_by_hop_node; + +#define foreach_ip4_hop_by_hop_error \ +_(PROCESSED, "Pkts with ip4 hop-by-hop options") + +typedef enum { +#define _(sym,str) IP4_HOP_BY_HOP_ERROR_##sym, + foreach_ip4_hop_by_hop_error +#undef _ + IP4_HOP_BY_HOP_N_ERROR, +} ip4_hop_by_hop_error_t; + +static char * ip4_hop_by_hop_error_strings[] = { +#define _(sym,string) string, + foreach_ip4_hop_by_hop_error +#undef _ +}; + +static uword +ip4_hop_by_hop_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 n_left_from, * from, * to_next; + ip_lookup_next_t next_index; + u32 processed = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + +#if 0 + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 next0 = IP4_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT; + u32 next1 = IP4_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT; + u32 sw_if_index0, sw_if_index1; + u8 tmp0[6], tmp1[6]; + ethernet_header_t *en0, *en1; + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* $$$$$ Dual loop: process 2 x packets here $$$$$ */ + ASSERT (b0->current_data == 0); + ASSERT (b1->current_data == 0); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + /* $$$$$ End of processing 2 x packets $$$$$ */ + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + ip4_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + ip4_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 adj_index0; + ip_adjacency_t * adj0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + adj0 = ip_get_adjacency (lm, adj_index0); + + /* $$$$$$$$$$$$ process one (or more) hop-by-hop header(s) here */ + + + /* $$$$$$$$$$$$ */ + + /* Send the packet e.g. to ip4_rewrite */ + next0 = adj0->lookup_next_index; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip4_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + } + + processed++; + + /* $$$$$ Done processing 1 packet here $$$$$ */ + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ip4_hop_by_hop_node.index, + IP4_HOP_BY_HOP_ERROR_PROCESSED, processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip4_hop_by_hop_node) = { + .function = ip4_hop_by_hop_node_fn, + .name = "ip4-hop-by-hop", + .vector_size = sizeof (u32), + .format_trace = format_ip4_hop_by_hop_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ip4_hop_by_hop_error_strings), + .error_strings = ip4_hop_by_hop_error_strings, + + /* See ip/lookup.h */ + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = { + [IP_LOOKUP_NEXT_MISS] = "ip4-miss", + [IP_LOOKUP_NEXT_DROP] = "ip4-drop", + [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", + [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", + [IP_LOOKUP_NEXT_ARP] = "ip4-arp", + [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", + [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify", + [IP_LOOKUP_NEXT_MAP] = "ip4-map", + [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t", + [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd", + [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop", /* probably not */ + [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", + [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", + }, +}; + +VLIB_REGISTER_NODE (ip4_add_hop_by_hop_node) = { + .function = ip4_hop_by_hop_node_fn, + .name = "ip4-add-hop-by-hop", + .vector_size = sizeof (u32), + .format_trace = format_ip4_hop_by_hop_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ip4_hop_by_hop_error_strings), + .error_strings = ip4_hop_by_hop_error_strings, + + /* See ip/lookup.h */ + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = { + [IP_LOOKUP_NEXT_MISS] = "ip4-miss", + [IP_LOOKUP_NEXT_DROP] = "ip4-drop", + [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", + [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", + [IP_LOOKUP_NEXT_ARP] = "ip4-arp", + [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", + [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify", + [IP_LOOKUP_NEXT_MAP] = "ip4-map", + [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t", + [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd", + [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop", /* probably not */ + [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", + [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", + }, +}; + +VLIB_REGISTER_NODE (ip4_pop_hop_by_hop_node) = { + .function = ip4_hop_by_hop_node_fn, + .name = "ip4-pop-hop-by-hop", + .vector_size = sizeof (u32), + .format_trace = format_ip4_hop_by_hop_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ip4_hop_by_hop_error_strings), + .error_strings = ip4_hop_by_hop_error_strings, + + /* See ip/lookup.h */ + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = { + [IP_LOOKUP_NEXT_MISS] = "ip4-miss", + [IP_LOOKUP_NEXT_DROP] = "ip4-drop", + [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", + [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", + [IP_LOOKUP_NEXT_ARP] = "ip4-arp", + [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", + [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify", + [IP_LOOKUP_NEXT_MAP] = "ip4-map", + [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t", + [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd", + [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd", + [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop", /* probably not */ + [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", + [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", + }, +}; + +static clib_error_t * +ip4_hop_by_hop_init (vlib_main_t * vm) +{ + ip4_hop_by_hop_main_t * hm = &ip4_hop_by_hop_main; + + hm->vlib_main = vm; + hm->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (ip4_hop_by_hop_init); diff --git a/vnet/vnet/ip/ip4_input.c b/vnet/vnet/ip/ip4_input.c new file mode 100644 index 00000000000..68edc0fa918 --- /dev/null +++ b/vnet/vnet/ip/ip4_input.c @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_input.c: IP v4 input node + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ppp/ppp.h> +#include <vnet/hdlc/hdlc.h> + +typedef struct { + u8 packet_data[64]; +} ip4_input_trace_t; + +static u8 * format_ip4_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ip4_input_trace_t * t = va_arg (*va, ip4_input_trace_t *); + + s = format (s, "%U", + format_ip4_header, + t->packet_data, sizeof (t->packet_data)); + + return s; +} + +typedef enum { + IP4_INPUT_NEXT_DROP, + IP4_INPUT_NEXT_PUNT, + IP4_INPUT_NEXT_LOOKUP, + IP4_INPUT_NEXT_LOOKUP_MULTICAST, + IP4_INPUT_NEXT_TTL_EXPIRE, + IP4_INPUT_N_NEXT, +} ip4_input_next_t; + +/* Validate IP v4 packets and pass them either to forwarding code + or drop/punt exception packets. */ +always_inline uword +ip4_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int verify_checksum) +{ + ip4_main_t * im = &ip4_main; + vnet_main_t * vnm = vnet_get_main(); + ip_lookup_main_t * lm = &im->lookup_main; + u32 n_left_from, * from, * to_next; + ip4_input_next_t next_index; + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index); + vlib_simple_counter_main_t * cm; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (ip4_input_trace_t)); + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_IP4); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * p0, * p1; + ip4_header_t * ip0, * ip1; + ip_config_main_t * cm0, * cm1; + u32 sw_if_index0, pi0, ip_len0, cur_len0, next0; + u32 sw_if_index1, pi1, ip_len1, cur_len1, next1; + i32 len_diff0, len_diff1; + u8 error0, error1, cast0, cast1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); + } + + to_next[0] = pi0 = from[0]; + to_next[1] = pi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; + + cast0 = ip4_address_is_multicast (&ip0->dst_address) ? VNET_MULTICAST : VNET_UNICAST; + cast1 = ip4_address_is_multicast (&ip1->dst_address) ? VNET_MULTICAST : VNET_UNICAST; + + cm0 = lm->rx_config_mains + cast0; + cm1 = lm->rx_config_mains + cast1; + + vnet_buffer (p0)->ip.current_config_index = vec_elt (cm0->config_index_by_sw_if_index, sw_if_index0); + vnet_buffer (p1)->ip.current_config_index = vec_elt (cm1->config_index_by_sw_if_index, sw_if_index1); + + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; + vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0; + + vnet_get_config_data (&cm0->config_main, + &vnet_buffer (p0)->ip.current_config_index, + &next0, + /* # bytes of config data */ 0); + vnet_get_config_data (&cm1->config_main, + &vnet_buffer (p1)->ip.current_config_index, + &next1, + /* # bytes of config data */ 0); + + vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1); + + error0 = error1 = IP4_ERROR_NONE; + + /* Punt packets with options. */ + error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ? IP4_ERROR_OPTIONS : error0; + error1 = (ip1->ip_version_and_header_length & 0xf) != 5 ? IP4_ERROR_OPTIONS : error1; + + /* Version != 4? Drop it. */ + error0 = (ip0->ip_version_and_header_length >> 4) != 4 ? IP4_ERROR_VERSION : error0; + error1 = (ip1->ip_version_and_header_length >> 4) != 4 ? IP4_ERROR_VERSION : error1; + + /* Verify header checksum. */ + if (verify_checksum) + { + ip_csum_t sum0, sum1; + + ip4_partial_header_checksum_x1 (ip0, sum0); + ip4_partial_header_checksum_x1 (ip1, sum1); + + error0 = 0xffff != ip_csum_fold (sum0) ? IP4_ERROR_BAD_CHECKSUM : error0; + error1 = 0xffff != ip_csum_fold (sum1) ? IP4_ERROR_BAD_CHECKSUM : error1; + } + + /* Drop fragmentation offset 1 packets. */ + error0 = ip4_get_fragment_offset (ip0) == 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error0; + error1 = ip4_get_fragment_offset (ip1) == 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error1; + + /* TTL <= 1? Drop it. */ + error0 = (ip0->ttl <= 1 && cast0 == VNET_UNICAST) ? IP4_ERROR_TIME_EXPIRED : error0; + error1 = (ip1->ttl <= 1 && cast1 == VNET_UNICAST) ? IP4_ERROR_TIME_EXPIRED : error1; + + /* Verify lengths. */ + ip_len0 = clib_net_to_host_u16 (ip0->length); + ip_len1 = clib_net_to_host_u16 (ip1->length); + + /* IP length must be at least minimal IP header. */ + error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0; + error1 = ip_len1 < sizeof (ip1[0]) ? IP4_ERROR_TOO_SHORT : error1; + + cur_len0 = vlib_buffer_length_in_chain (vm, p0); + cur_len1 = vlib_buffer_length_in_chain (vm, p1); + + len_diff0 = cur_len0 - ip_len0; + len_diff1 = cur_len1 - ip_len1; + + error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0; + error1 = len_diff1 < 0 ? IP4_ERROR_BAD_LENGTH : error1; + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + if (PREDICT_FALSE(error0 != IP4_ERROR_NONE)) + { + next0 = (error0 != IP4_ERROR_OPTIONS + ? (error0 == IP4_ERROR_TIME_EXPIRED + ? IP4_INPUT_NEXT_TTL_EXPIRE + : IP4_INPUT_NEXT_DROP) + : IP4_INPUT_NEXT_PUNT); + } + if (PREDICT_FALSE(error1 != IP4_ERROR_NONE)) + { + next1 = (error1 != IP4_ERROR_OPTIONS + ? (error1 == IP4_ERROR_TIME_EXPIRED + ? IP4_INPUT_NEXT_TTL_EXPIRE + : IP4_INPUT_NEXT_DROP) + : IP4_INPUT_NEXT_PUNT); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + ip_config_main_t * cm0; + u32 sw_if_index0, pi0, ip_len0, cur_len0, next0; + i32 len_diff0; + u8 error0, cast0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip0 = vlib_buffer_get_current (p0); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + cast0 = ip4_address_is_multicast (&ip0->dst_address) ? VNET_MULTICAST : VNET_UNICAST; + cm0 = lm->rx_config_mains + cast0; + vnet_buffer (p0)->ip.current_config_index = vec_elt (cm0->config_index_by_sw_if_index, sw_if_index0); + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; + vnet_get_config_data (&cm0->config_main, + &vnet_buffer (p0)->ip.current_config_index, + &next0, + /* # bytes of config data */ 0); + + vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + + error0 = IP4_ERROR_NONE; + + /* Punt packets with options. */ + error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ? IP4_ERROR_OPTIONS : error0; + + /* Version != 4? Drop it. */ + error0 = (ip0->ip_version_and_header_length >> 4) != 4 ? IP4_ERROR_VERSION : error0; + + /* Verify header checksum. */ + if (verify_checksum) + { + ip_csum_t sum0; + + ip4_partial_header_checksum_x1 (ip0, sum0); + error0 = 0xffff != ip_csum_fold (sum0) ? IP4_ERROR_BAD_CHECKSUM : error0; + } + + /* Drop fragmentation offset 1 packets. */ + error0 = ip4_get_fragment_offset (ip0) == 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error0; + + /* TTL <= 1? Drop it. */ + error0 = (ip0->ttl <= 1 && cast0 == VNET_UNICAST) ? IP4_ERROR_TIME_EXPIRED : error0; + + /* Verify lengths. */ + ip_len0 = clib_net_to_host_u16 (ip0->length); + + /* IP length must be at least minimal IP header. */ + error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0; + + cur_len0 = vlib_buffer_length_in_chain (vm, p0); + len_diff0 = cur_len0 - ip_len0; + error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0; + + p0->error = error_node->errors[error0]; + if (PREDICT_FALSE(error0 != IP4_ERROR_NONE)) + { + next0 = (error0 != IP4_ERROR_OPTIONS + ? (error0 == IP4_ERROR_TIME_EXPIRED + ? IP4_INPUT_NEXT_TTL_EXPIRE + : IP4_INPUT_NEXT_DROP) + : IP4_INPUT_NEXT_PUNT); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +ip4_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_input_inline (vm, node, frame, /* verify_checksum */ 1); +} + +static uword +ip4_input_no_checksum (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0); +} + +static char * ip4_error_strings[] = { +#define _(sym,string) string, + foreach_ip4_error +#undef _ +}; + +VLIB_REGISTER_NODE (ip4_input_node) = { + .function = ip4_input, + .name = "ip4-input", + .vector_size = sizeof (u32), + + .n_errors = IP4_N_ERROR, + .error_strings = ip4_error_strings, + + .n_next_nodes = IP4_INPUT_N_NEXT, + .next_nodes = { + [IP4_INPUT_NEXT_DROP] = "error-drop", + [IP4_INPUT_NEXT_PUNT] = "error-punt", + [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup", + [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast", + [IP4_INPUT_NEXT_TTL_EXPIRE] = "ip4-icmp-ttl-expire", + }, + + .format_buffer = format_ip4_header, + .format_trace = format_ip4_input_trace, +}; + +VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = { + .function = ip4_input_no_checksum, + .name = "ip4-input-no-checksum", + .vector_size = sizeof (u32), + + .n_next_nodes = IP4_INPUT_N_NEXT, + .next_nodes = { + [IP4_INPUT_NEXT_DROP] = "error-drop", + [IP4_INPUT_NEXT_PUNT] = "error-punt", + [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup", + [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast", + [IP4_INPUT_NEXT_TTL_EXPIRE] = "ip4-icmp-ttl-expire", + }, + + .format_buffer = format_ip4_header, + .format_trace = format_ip4_input_trace, +}; + +static clib_error_t * ip4_init (vlib_main_t * vm) +{ + clib_error_t * error; + + ethernet_register_input_type (vm, ETHERNET_TYPE_IP4, + ip4_input_node.index); + ppp_register_input_protocol (vm, PPP_PROTOCOL_ip4, + ip4_input_node.index); + hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip4, + ip4_input_node.index); + + { + pg_node_t * pn; + pn = pg_get_node (ip4_input_node.index); + pn->unformat_edit = unformat_pg_ip4_header; + pn = pg_get_node (ip4_input_no_checksum_node.index); + pn->unformat_edit = unformat_pg_ip4_header; + } + + if ((error = vlib_call_init_function (vm, ip4_cli_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip4_source_check_init))) + return error; + + /* Set flow hash to something non-zero. */ + ip4_main.flow_hash_seed = 0xdeadbeef; + + /* Default TTL for packets we generate. */ + ip4_main.host_config.ttl = 64; + + return error; +} + +VLIB_INIT_FUNCTION (ip4_init); diff --git a/vnet/vnet/ip/ip4_mtrie.c b/vnet/vnet/ip/ip4_mtrie.c new file mode 100644 index 00000000000..ed4a0d9f44f --- /dev/null +++ b/vnet/vnet/ip/ip4_mtrie.c @@ -0,0 +1,561 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_fib.h: ip4 mtrie fib + * + * Copyright (c) 2012 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +static void +ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init, uword prefix_len) +{ + p->n_non_empty_leafs = ip4_fib_mtrie_leaf_is_empty (init) ? 0 : ARRAY_LEN (p->leaves); + memset (p->dst_address_bits_of_leaves, prefix_len, sizeof (p->dst_address_bits_of_leaves)); + + /* Initialize leaves. */ +#ifdef CLIB_HAVE_VEC128 + { + u32x4 * l, init_x4; + +#ifndef __ALTIVEC__ + init_x4 = u32x4_splat (init); +#else + { + u32x4_union_t y; + y.as_u32[0] = init; + y.as_u32[1] = init; + y.as_u32[2] = init; + y.as_u32[3] = init; + init_x4 = y.as_u32x4; + } +#endif + + for (l = p->leaves_as_u32x4; l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4) + { + l[0] = init_x4; + l[1] = init_x4; + l[2] = init_x4; + l[3] = init_x4; + } + } +#else + { + u32 * l; + + for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) + { + l[0] = init; + l[1] = init; + l[2] = init; + l[3] = init; + } + } +#endif +} + +static ip4_fib_mtrie_leaf_t +ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf, uword prefix_len) +{ + ip4_fib_mtrie_ply_t * p; + + /* Get cache aligned ply. */ + pool_get_aligned (m->ply_pool, p, sizeof (p[0])); + + ply_init (p, init_leaf, prefix_len); + return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool); +} + +always_inline ip4_fib_mtrie_ply_t * +get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l) +{ + uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l); + /* It better not be the root ply. */ + ASSERT (n != 0); + return pool_elt_at_index (m->ply_pool, n); +} + +static void +ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) +{ + uword i, is_root; + + is_root = p - m->ply_pool == 0; + + for (i = 0 ; i < ARRAY_LEN (p->leaves); i++) + { + ip4_fib_mtrie_leaf_t l = p->leaves[i]; + if (ip4_fib_mtrie_leaf_is_next_ply (l)) + ply_free (m, get_next_ply_for_leaf (m, l)); + } + + if (is_root) + ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0); + else + pool_put (m->ply_pool, p); +} + +void ip4_fib_free (ip4_fib_mtrie_t * m) +{ + ip4_fib_mtrie_ply_t * root_ply = pool_elt_at_index (m->ply_pool, 0); + ply_free (m, root_ply); +} + +u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst) +{ + ip4_fib_mtrie_ply_t * p = pool_elt_at_index (m->ply_pool, 0); + ip4_fib_mtrie_leaf_t l; + + l = p->leaves[dst.as_u8[0]]; + if (ip4_fib_mtrie_leaf_is_terminal (l)) + return ip4_fib_mtrie_leaf_get_adj_index (l); + + p = get_next_ply_for_leaf (m, l); + l = p->leaves[dst.as_u8[1]]; + if (ip4_fib_mtrie_leaf_is_terminal (l)) + return ip4_fib_mtrie_leaf_get_adj_index (l); + + p = get_next_ply_for_leaf (m, l); + l = p->leaves[dst.as_u8[2]]; + if (ip4_fib_mtrie_leaf_is_terminal (l)) + return ip4_fib_mtrie_leaf_get_adj_index (l); + + p = get_next_ply_for_leaf (m, l); + l = p->leaves[dst.as_u8[3]]; + + ASSERT (ip4_fib_mtrie_leaf_is_terminal (l)); + return ip4_fib_mtrie_leaf_get_adj_index (l); +} + +typedef struct { + ip4_address_t dst_address; + u32 dst_address_length; + u32 adj_index; +} ip4_fib_mtrie_set_unset_leaf_args_t; + +static void +set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, + ip4_fib_mtrie_ply_t * ply, + ip4_fib_mtrie_leaf_t new_leaf, + uword new_leaf_dst_address_bits) +{ + ip4_fib_mtrie_leaf_t old_leaf; + uword i; + + ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf)); + ASSERT (! ip4_fib_mtrie_leaf_is_empty (new_leaf)); + + for (i = 0; i < ARRAY_LEN (ply->leaves); i++) + { + old_leaf = ply->leaves[i]; + + /* Recurse into sub plies. */ + if (! ip4_fib_mtrie_leaf_is_terminal (old_leaf)) + { + ip4_fib_mtrie_ply_t * sub_ply = get_next_ply_for_leaf (m, old_leaf); + set_ply_with_more_specific_leaf (m, sub_ply, new_leaf, new_leaf_dst_address_bits); + } + + /* Replace less specific terminal leaves with new leaf. */ + else if (new_leaf_dst_address_bits >= ply->dst_address_bits_of_leaves[i]) + { + ply->leaves[i] = new_leaf; + ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits; + ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf); + } + } +} + +static void +set_leaf (ip4_fib_mtrie_t * m, + ip4_fib_mtrie_set_unset_leaf_args_t * a, + u32 old_ply_index, + u32 dst_address_byte_index) +{ + ip4_fib_mtrie_leaf_t old_leaf, new_leaf; + i32 n_dst_bits_next_plies; + u8 dst_byte; + + ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32); + ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); + + n_dst_bits_next_plies = a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1); + + dst_byte = a->dst_address.as_u8[dst_address_byte_index]; + + /* Number of bits next plies <= 0 => insert leaves this ply. */ + if (n_dst_bits_next_plies <= 0) + { + uword i, n_dst_bits_this_ply, old_leaf_is_terminal; + + n_dst_bits_this_ply = -n_dst_bits_next_plies; + ASSERT ((a->dst_address.as_u8[dst_address_byte_index] & pow2_mask (n_dst_bits_this_ply)) == 0); + + for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++) + { + ip4_fib_mtrie_ply_t * old_ply, * new_ply; + + old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + + old_leaf = old_ply->leaves[i]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + /* Is leaf to be inserted more specific? */ + if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i]) + { + new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + if (old_leaf_is_terminal) + { + old_ply->dst_address_bits_of_leaves[i] = a->dst_address_length; + old_ply->leaves[i] = new_leaf; + old_ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf); + ASSERT (old_ply->n_non_empty_leafs <= ARRAY_LEN (old_ply->leaves)); + } + else + { + /* Existing leaf points to another ply. We need to place new_leaf into all + more specific slots. */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_ply_with_more_specific_leaf (m, new_ply, new_leaf, a->dst_address_length); + } + } + + else if (! old_leaf_is_terminal) + { + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1); + } + } + } + else + { + ip4_fib_mtrie_ply_t * old_ply, * new_ply; + + old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + old_leaf = old_ply->leaves[dst_byte]; + if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) + { + new_leaf = ply_create (m, old_leaf, old_ply->dst_address_bits_of_leaves[dst_byte]); + new_ply = get_next_ply_for_leaf (m, new_leaf); + + /* Refetch since ply_create may move pool. */ + old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + + old_ply->leaves[dst_byte] = new_leaf; + old_ply->dst_address_bits_of_leaves[dst_byte] = 0; + + old_ply->n_non_empty_leafs -= ip4_fib_mtrie_leaf_is_non_empty (old_leaf); + ASSERT (old_ply->n_non_empty_leafs >= 0); + + /* Account for the ply we just created. */ + old_ply->n_non_empty_leafs += 1; + } + else + new_ply = get_next_ply_for_leaf (m, old_leaf); + + set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1); + } +} + +static uword +unset_leaf (ip4_fib_mtrie_t * m, + ip4_fib_mtrie_set_unset_leaf_args_t * a, + ip4_fib_mtrie_ply_t * old_ply, + u32 dst_address_byte_index) +{ + ip4_fib_mtrie_leaf_t old_leaf, del_leaf; + i32 n_dst_bits_next_plies; + uword i, n_dst_bits_this_ply, old_leaf_is_terminal; + u8 dst_byte; + + ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32); + ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); + + n_dst_bits_next_plies = a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1); + + dst_byte = a->dst_address.as_u8[dst_address_byte_index]; + if (n_dst_bits_next_plies < 0) + dst_byte &= ~pow2_mask (-n_dst_bits_next_plies); + + n_dst_bits_this_ply = n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0; + n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply); + + del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++) + { + old_leaf = old_ply->leaves[i]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (old_leaf == del_leaf + || (! old_leaf_is_terminal + && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), dst_address_byte_index + 1))) + { + old_ply->leaves[i] = IP4_FIB_MTRIE_LEAF_EMPTY; + old_ply->dst_address_bits_of_leaves[i] = 0; + + /* No matter what we just deleted a non-empty leaf. */ + ASSERT (! ip4_fib_mtrie_leaf_is_empty (old_leaf)); + old_ply->n_non_empty_leafs -= 1; + + ASSERT (old_ply->n_non_empty_leafs >= 0); + if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0) + { + pool_put (m->ply_pool, old_ply); + /* Old ply was deleted. */ + return 1; + } + } + } + + /* Old ply was not deleted. */ + return 0; +} + +void ip4_mtrie_init (ip4_fib_mtrie_t * m) +{ + ip4_fib_mtrie_leaf_t root; + memset (m, 0, sizeof (m[0])); + m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY; + root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, /* dst_address_bits_of_leaves */ 0); + ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0); +} + +void +ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, + ip4_address_t dst_address, + u32 dst_address_length, + u32 adj_index, + u32 is_del) +{ + ip4_fib_mtrie_t * m = &fib->mtrie; + ip4_fib_mtrie_ply_t * root_ply; + ip4_fib_mtrie_set_unset_leaf_args_t a; + ip4_main_t * im = &ip4_main; + + ASSERT(m->ply_pool != 0); + + root_ply = pool_elt_at_index (m->ply_pool, 0); + + /* Honor dst_address_length. Fib masks are in network byte order */ + dst_address.as_u32 &= im->fib_masks[dst_address_length]; + a.dst_address = dst_address; + a.dst_address_length = dst_address_length; + a.adj_index = adj_index; + + if (! is_del) + { + if (dst_address_length == 0) + m->default_leaf = ip4_fib_mtrie_leaf_set_adj_index (adj_index); + else + set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); + } + else + { + if (dst_address_length == 0) + m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY; + + else + { + ip4_main_t * im = &ip4_main; + uword i; + + unset_leaf (m, &a, root_ply, 0); + + /* Find next less specific route and insert into mtrie. */ + for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= 1; i--) + { + uword * p; + ip4_address_t key; + + if (! fib->adj_index_by_dst_address[i]) + continue; + + key.as_u32 = dst_address.as_u32 & im->fib_masks[i]; + p = hash_get (fib->adj_index_by_dst_address[i], key.as_u32); + if (p) + { + a.dst_address = key; + a.dst_address_length = i; + a.adj_index = p[0]; + set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); + break; + } + } + } + } +} + +always_inline uword +maybe_remap_leaf (ip_lookup_main_t * lm, ip4_fib_mtrie_leaf_t * p) +{ + ip4_fib_mtrie_leaf_t l = p[0]; + uword was_remapped_to_empty_leaf = 0; + if (ip4_fib_mtrie_leaf_is_terminal (l)) + { + u32 adj_index = ip4_fib_mtrie_leaf_get_adj_index (l); + u32 m = vec_elt (lm->adjacency_remap_table, adj_index); + if (m) + { + was_remapped_to_empty_leaf = m == ~0; + if (was_remapped_to_empty_leaf) + p[0] = (was_remapped_to_empty_leaf + ? IP4_FIB_MTRIE_LEAF_EMPTY + : ip4_fib_mtrie_leaf_set_adj_index (m - 1)); + } + } + return was_remapped_to_empty_leaf; +} + +static void maybe_remap_ply (ip_lookup_main_t * lm, ip4_fib_mtrie_ply_t * ply) +{ + u32 n_remapped_to_empty = 0; + u32 i; + for (i = 0; i < ARRAY_LEN (ply->leaves); i++) + n_remapped_to_empty += maybe_remap_leaf (lm, &ply->leaves[i]); + if (n_remapped_to_empty > 0) + { + ASSERT (n_remapped_to_empty <= ply->n_non_empty_leafs); + ply->n_non_empty_leafs -= n_remapped_to_empty; + if (ply->n_non_empty_leafs == 0) + os_panic (); + } +} + +void ip4_mtrie_maybe_remap_adjacencies (ip_lookup_main_t * lm, ip4_fib_mtrie_t * m) +{ + ip4_fib_mtrie_ply_t * ply; + pool_foreach (ply, m->ply_pool, maybe_remap_ply (lm, ply)); + maybe_remap_leaf (lm, &m->default_leaf); +} + +/* Returns number of bytes of memory used by mtrie. */ +static uword mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) +{ + uword bytes, i; + + if (! p) + { + if (pool_is_free_index (m->ply_pool, 0)) + return 0; + p = pool_elt_at_index (m->ply_pool, 0); + } + + bytes = sizeof (p[0]); + for (i = 0 ; i < ARRAY_LEN (p->leaves); i++) + { + ip4_fib_mtrie_leaf_t l = p->leaves[i]; + if (ip4_fib_mtrie_leaf_is_next_ply (l)) + bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l)); + } + + return bytes; +} + +static u8 * format_ip4_fib_mtrie_leaf (u8 * s, va_list * va) +{ + ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t); + + if (ip4_fib_mtrie_leaf_is_empty (l)) + s = format (s, "miss"); + else if (ip4_fib_mtrie_leaf_is_terminal (l)) + s = format (s, "adj %d", ip4_fib_mtrie_leaf_get_adj_index (l)); + else + s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l)); + return s; +} + +static u8 * format_ip4_fib_mtrie_ply (u8 * s, va_list * va) +{ + ip4_fib_mtrie_t * m = va_arg (*va, ip4_fib_mtrie_t *); + u32 base_address = va_arg (*va, u32); + u32 ply_index = va_arg (*va, u32); + u32 dst_address_byte_index = va_arg (*va, u32); + ip4_fib_mtrie_ply_t * p; + uword i, indent; + + p = pool_elt_at_index (m->ply_pool, ply_index); + indent = format_get_indent (s); + s = format (s, "ply index %d, %d non-empty leaves", ply_index, p->n_non_empty_leafs); + for (i = 0; i < ARRAY_LEN (p->leaves); i++) + { + ip4_fib_mtrie_leaf_t l = p->leaves[i]; + + if (! ip4_fib_mtrie_leaf_is_empty (l)) + { + u32 a, ia_length; + ip4_address_t ia; + + a = base_address + (i << (24 - 8*dst_address_byte_index)); + ia.as_u32 = clib_host_to_net_u32 (a); + if (ip4_fib_mtrie_leaf_is_terminal (l)) + ia_length = p->dst_address_bits_of_leaves[i]; + else + ia_length = 8*(1 + dst_address_byte_index); + s = format (s, "\n%U%20U %U", + format_white_space, indent + 2, + format_ip4_address_and_length, &ia, ia_length, + format_ip4_fib_mtrie_leaf, l); + + if (ip4_fib_mtrie_leaf_is_next_ply (l)) + s = format (s, "\n%U%U", + format_white_space, indent + 2, + format_ip4_fib_mtrie_ply, m, a, + ip4_fib_mtrie_leaf_get_next_ply_index (l), + dst_address_byte_index + 1); + } + } + + return s; +} + +u8 * format_ip4_fib_mtrie (u8 * s, va_list * va) +{ + ip4_fib_mtrie_t * m = va_arg (*va, ip4_fib_mtrie_t *); + + s = format (s, "%d plies, memory usage %U", + pool_elts (m->ply_pool), + format_memory_size, mtrie_memory_usage (m, 0)); + + if (pool_elts (m->ply_pool) > 0) + { + ip4_address_t base_address; + base_address.as_u32 = 0; + s = format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0); + } + + return s; +} diff --git a/vnet/vnet/ip/ip4_mtrie.h b/vnet/vnet/ip/ip4_mtrie.h new file mode 100644 index 00000000000..31de41e14fa --- /dev/null +++ b/vnet/vnet/ip/ip4_mtrie.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_fib.h: ip4 mtrie fib + * + * Copyright (c) 2012 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_ip4_fib_h +#define included_ip_ip4_fib_h + +#include <vppinfra/cache.h> +#include <vppinfra/vector.h> +#include <vnet/ip/lookup.h> +#include <vnet/ip/ip4_packet.h> /* for ip4_address_t */ + +/* ip4 fib leafs: 4 ply 8-8-8-8 mtrie. + 1 + 2*adj_index for terminal leaves. + 0 + 2*next_ply_index for non-terminals. + 1 => empty (adjacency index of zero is special miss adjacency). */ +typedef u32 ip4_fib_mtrie_leaf_t; + +#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*IP_LOOKUP_MISS_ADJ_INDEX) +#define IP4_FIB_MTRIE_LEAF_ROOT (0 + 2*0) + +always_inline u32 ip4_fib_mtrie_leaf_is_empty (ip4_fib_mtrie_leaf_t n) +{ return n == IP4_FIB_MTRIE_LEAF_EMPTY; } + +always_inline u32 ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_leaf_t n) +{ return n != IP4_FIB_MTRIE_LEAF_EMPTY; } + +always_inline u32 ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n) +{ return n & 1; } + +always_inline u32 ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) +{ + ASSERT (ip4_fib_mtrie_leaf_is_terminal (n)); + return n >> 1; +} + +always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index) +{ + ip4_fib_mtrie_leaf_t l; + l = 1 + 2*adj_index; + ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index); + return l; +} + +always_inline u32 ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n) +{ return (n & 1) == 0; } + +always_inline u32 ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n) +{ + ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n)); + return n >> 1; +} + +always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_leaf_set_next_ply_index (u32 i) +{ + ip4_fib_mtrie_leaf_t l; + l = 0 + 2*i; + ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i); + return l; +} + +/* One ply of the 4 ply mtrie fib. */ +typedef struct { + union { + ip4_fib_mtrie_leaf_t leaves[256]; + +#ifdef CLIB_HAVE_VEC128 + u32x4 leaves_as_u32x4[256 / 4]; +#endif + }; + + /* Prefix length for terminal leaves. */ + u8 dst_address_bits_of_leaves[256]; + + /* Number of non-empty leafs (whether terminal or not). */ + i32 n_non_empty_leafs; + + /* Pad to cache line boundary. */ + u8 pad[CLIB_CACHE_LINE_BYTES + - 1 * sizeof (i32)]; +} ip4_fib_mtrie_ply_t; + +typedef struct { + /* Pool of plies. Index zero is root ply. */ + ip4_fib_mtrie_ply_t * ply_pool; + + /* Special case leaf for default route 0.0.0.0/0. */ + ip4_fib_mtrie_leaf_t default_leaf; +} ip4_fib_mtrie_t; + +void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m); + +struct ip4_fib_t; + +void ip4_fib_mtrie_add_del_route (struct ip4_fib_t * f, + ip4_address_t dst_address, + u32 dst_address_length, + u32 adj_index, + u32 is_del); + +/* Returns adjacency index. */ +u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst); + +void ip4_mtrie_maybe_remap_adjacencies (ip_lookup_main_t * lm, ip4_fib_mtrie_t * m); + +format_function_t format_ip4_fib_mtrie; + +/* Lookup step. Processes 1 byte of 4 byte ip4 address. */ +always_inline ip4_fib_mtrie_leaf_t +ip4_fib_mtrie_lookup_step (ip4_fib_mtrie_t * m, + ip4_fib_mtrie_leaf_t current_leaf, + ip4_address_t * dst_address, + u32 dst_address_byte_index) +{ + ip4_fib_mtrie_leaf_t next_leaf; + ip4_fib_mtrie_ply_t * ply; + uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf); + + ply = m->ply_pool + (current_is_terminal ? 0 : (current_leaf >> 1)); + next_leaf = ply->leaves[dst_address->as_u8[dst_address_byte_index]]; + next_leaf = current_is_terminal ? current_leaf : next_leaf; + + return next_leaf; +} + +#endif /* included_ip_ip4_fib_h */ diff --git a/vnet/vnet/ip/ip4_packet.h b/vnet/vnet/ip/ip4_packet.h new file mode 100644 index 00000000000..69467eb4e03 --- /dev/null +++ b/vnet/vnet/ip/ip4_packet.h @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip4/packet.h: ip4 packet format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip4_packet_h +#define included_ip4_packet_h + +#include <vnet/ip/ip_packet.h> /* for ip_csum_t */ +#include <vnet/ip/tcp_packet.h> /* for tcp_header_t */ +#include <vppinfra/byte_order.h> /* for clib_net_to_host_u16 */ + +/* IP4 address which can be accessed either as 4 bytes + or as a 32-bit number. */ +typedef union { + u8 data[4]; + u32 data_u32; + /* Aliases. */ + u8 as_u8[4]; + u32 as_u32; +} ip4_address_t; + +typedef struct { + /* IP address must be first for ip_interface_address_get_address() to work */ + ip4_address_t ip4_addr; + u32 fib_index; +} ip4_address_fib_t; + +always_inline void +ip4_addr_fib_init (ip4_address_fib_t * addr_fib, ip4_address_t * address, + u32 fib_index) +{ + memcpy (&addr_fib->ip4_addr, address, sizeof (addr_fib->ip4_addr)); + addr_fib->fib_index = fib_index; +} + +/* (src,dst) pair of addresses as found in packet header. */ +typedef struct { + ip4_address_t src, dst; +} ip4_address_pair_t; + +/* If address is a valid netmask, return length of mask. */ +always_inline uword +ip4_address_netmask_length (ip4_address_t * a) +{ + uword result = 0; + uword i; + for (i = 0; i < ARRAY_LEN (a->as_u8); i++) + { + switch (a->as_u8[i]) + { + case 0xff: result += 8; break; + case 0xfe: result += 7; goto done; + case 0xfc: result += 6; goto done; + case 0xf8: result += 5; goto done; + case 0xf0: result += 4; goto done; + case 0xe0: result += 3; goto done; + case 0xc0: result += 2; goto done; + case 0x80: result += 1; goto done; + case 0x00: result += 0; goto done; + default: + /* Not a valid netmask mask. */ + return ~0; + } + } + done: + return result; +} + +typedef union { + struct { + /* 4 bit packet length (in 32bit units) and version VVVVLLLL. + e.g. for packets w/ no options ip_version_and_header_length == 0x45. */ + u8 ip_version_and_header_length; + + /* Type of service. */ + u8 tos; + + /* Total layer 3 packet length including this header. */ + u16 length; + + /* Fragmentation ID. */ + u16 fragment_id; + + /* 3 bits of flags and 13 bits of fragment offset (in units + of 8 byte quantities). */ + u16 flags_and_fragment_offset; +#define IP4_HEADER_FLAG_MORE_FRAGMENTS (1 << 13) +#define IP4_HEADER_FLAG_DONT_FRAGMENT (1 << 14) +#define IP4_HEADER_FLAG_CONGESTION (1 << 15) + + /* Time to live decremented by router at each hop. */ + u8 ttl; + + /* Next level protocol packet. */ + u8 protocol; + + /* Checksum. */ + u16 checksum; + + /* Source and destination address. */ + union { + struct { + ip4_address_t src_address, dst_address; + }; + ip4_address_pair_t address_pair; + }; + }; + + /* For checksumming we'll want to access IP header in word sized chunks. */ + /* For 64 bit machines. */ + CLIB_PACKED (struct { + u64 checksum_data_64[2]; + u32 checksum_data_64_32[1]; + }); + + /* For 32 bit machines. */ + CLIB_PACKED (struct { + u32 checksum_data_32[5]; + }); +} ip4_header_t; + +/* Value of ip_version_and_header_length for packets w/o options. */ +#define IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS \ + ((4 << 4) | (sizeof (ip4_header_t) / sizeof (u32))) + +always_inline int +ip4_get_fragment_offset (ip4_header_t * i) +{ return clib_net_to_host_u16 (i->flags_and_fragment_offset) & 0x1fff; } + +always_inline int +ip4_get_fragment_more (ip4_header_t * i) +{ return clib_net_to_host_u16 (i->flags_and_fragment_offset) & IP4_HEADER_FLAG_MORE_FRAGMENTS; } + +always_inline int +ip4_is_fragment (ip4_header_t * i) +{ return (i->flags_and_fragment_offset & + clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS)); } + +always_inline int +ip4_is_first_fragment (ip4_header_t * i) +{ return (i->flags_and_fragment_offset & + clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS)) == + clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); } + +/* Fragment offset in bytes. */ +always_inline int +ip4_get_fragment_offset_bytes (ip4_header_t * i) +{ return 8 * ip4_get_fragment_offset (i); } + +always_inline int +ip4_header_bytes (ip4_header_t * i) +{ return sizeof (u32) * (i->ip_version_and_header_length & 0xf); } + +always_inline void * +ip4_next_header (ip4_header_t * i) +{ return (void *) i + ip4_header_bytes (i); } + +always_inline u16 +ip4_header_checksum (ip4_header_t * i) +{ + u16 save, csum; + ip_csum_t sum; + + save = i->checksum; + i->checksum = 0; + sum = ip_incremental_checksum (0, i, ip4_header_bytes (i)); + csum = ~ip_csum_fold (sum); + + i->checksum = save; + + /* Make checksum agree for special case where either + 0 or 0xffff would give same 1s complement sum. */ + if (csum == 0 && save == 0xffff) + csum = save; + + return csum; +} + +static inline uword +ip4_header_checksum_is_valid (ip4_header_t * i) +{ return i->checksum == ip4_header_checksum (i); } + +#define ip4_partial_header_checksum_x1(ip0,sum0) \ +do { \ + if (BITS (ip_csum_t) > 32) \ + { \ + sum0 = ip0->checksum_data_64[0]; \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \ + } \ + else \ + { \ + sum0 = ip0->checksum_data_32[0]; \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \ + } \ +} while (0) + +#define ip4_partial_header_checksum_x2(ip0,ip1,sum0,sum1) \ +do { \ + if (BITS (ip_csum_t) > 32) \ + { \ + sum0 = ip0->checksum_data_64[0]; \ + sum1 = ip1->checksum_data_64[0]; \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64[1]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64_32[0]); \ + } \ + else \ + { \ + sum0 = ip0->checksum_data_32[0]; \ + sum1 = ip1->checksum_data_32[0]; \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[1]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[2]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[3]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[4]); \ + } \ +} while (0) + +always_inline uword +ip4_address_is_multicast (ip4_address_t * a) +{ return (a->data[0] & 0xf0) == 0xe0; } + +always_inline void +ip4_multicast_address_set_for_group (ip4_address_t * a, ip_multicast_group_t g) +{ + ASSERT (g < (1 << 28)); + a->as_u32 = clib_host_to_net_u32 ((0xe << 28) + g); +} + +always_inline void +ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0) +{ + u32 src0, dst0; + + src0 = ip0->src_address.data_u32; + dst0 = ip0->dst_address.data_u32; + ip0->src_address.data_u32 = dst0; + ip0->dst_address.data_u32 = src0; + + src0 = tcp0->ports.src; + dst0 = tcp0->ports.dst; + tcp0->ports.src = dst0; + tcp0->ports.dst = src0; +} + +always_inline void +ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1, + tcp_header_t * tcp0, tcp_header_t * tcp1) +{ + u32 src0, dst0, src1, dst1; + + src0 = ip0->src_address.data_u32; + src1 = ip1->src_address.data_u32; + dst0 = ip0->dst_address.data_u32; + dst1 = ip1->dst_address.data_u32; + ip0->src_address.data_u32 = dst0; + ip1->src_address.data_u32 = dst1; + ip0->dst_address.data_u32 = src0; + ip1->dst_address.data_u32 = src1; + + src0 = tcp0->ports.src; + src1 = tcp1->ports.src; + dst0 = tcp0->ports.dst; + dst1 = tcp1->ports.dst; + tcp0->ports.src = dst0; + tcp1->ports.src = dst1; + tcp0->ports.dst = src0; + tcp1->ports.dst = src1; +} + +#endif /* included_ip4_packet_h */ diff --git a/vnet/vnet/ip/ip4_pg.c b/vnet/vnet/ip/ip4_pg.c new file mode 100644 index 00000000000..9710d8d4c5a --- /dev/null +++ b/vnet/vnet/ip/ip4_pg.c @@ -0,0 +1,387 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_pg: IP v4 packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> + +#define IP4_PG_EDIT_CHECKSUM (1 << 0) +#define IP4_PG_EDIT_LENGTH (1 << 1) + +static_always_inline void +compute_length_and_or_checksum (vlib_main_t * vm, + u32 * packets, + u32 n_packets, + u32 ip_header_offset, + u32 flags) +{ + ASSERT (flags != 0); + + while (n_packets >= 2) + { + u32 pi0, pi1; + vlib_buffer_t * p0, * p1; + ip4_header_t * ip0, * ip1; + ip_csum_t sum0, sum1; + + pi0 = packets[0]; + pi1 = packets[1]; + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + n_packets -= 2; + packets += 2; + + ip0 = (void *) (p0->data + ip_header_offset); + ip1 = (void *) (p1->data + ip_header_offset); + + if (flags & IP4_PG_EDIT_LENGTH) + { + ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - ip_header_offset); + ip1->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) - ip_header_offset); + } + + if (flags & IP4_PG_EDIT_CHECKSUM) + { + ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0])); + ASSERT (ip4_header_bytes (ip1) == sizeof (ip1[0])); + + ip0->checksum = 0; + ip1->checksum = 0; + + ip4_partial_header_checksum_x2 (ip0, ip1, sum0, sum1); + ip0->checksum = ~ ip_csum_fold (sum0); + ip1->checksum = ~ ip_csum_fold (sum1); + + ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + ASSERT (ip1->checksum == ip4_header_checksum (ip1)); + } + } + + while (n_packets >= 1) + { + u32 pi0; + vlib_buffer_t * p0; + ip4_header_t * ip0; + ip_csum_t sum0; + + pi0 = packets[0]; + p0 = vlib_get_buffer (vm, pi0); + n_packets -= 1; + packets += 1; + + ip0 = (void *) (p0->data + ip_header_offset); + + if (flags & IP4_PG_EDIT_LENGTH) + ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - ip_header_offset); + + if (flags & IP4_PG_EDIT_CHECKSUM) + { + ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0])); + + ip0->checksum = 0; + + ip4_partial_header_checksum_x1 (ip0, sum0); + ip0->checksum = ~ ip_csum_fold (sum0); + + ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + } + } +} + +static void +ip4_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, + u32 n_packets) +{ + vlib_main_t * vm = pg->vlib_main; + u32 ip_offset; + + ip_offset = g->start_byte_offset; + + switch (g->edit_function_opaque) + { + case IP4_PG_EDIT_LENGTH: + compute_length_and_or_checksum (vm, packets, n_packets, ip_offset, + IP4_PG_EDIT_LENGTH); + break; + + case IP4_PG_EDIT_CHECKSUM: + compute_length_and_or_checksum (vm, packets, n_packets, ip_offset, + IP4_PG_EDIT_CHECKSUM); + break; + + case IP4_PG_EDIT_LENGTH | IP4_PG_EDIT_CHECKSUM: + compute_length_and_or_checksum (vm, packets, n_packets, ip_offset, + IP4_PG_EDIT_LENGTH + | IP4_PG_EDIT_CHECKSUM); + break; + + default: + ASSERT (0); + break; + } +} + +typedef struct { + pg_edit_t ip_version, header_length; + pg_edit_t tos; + pg_edit_t length; + + pg_edit_t fragment_id, fragment_offset; + + /* Flags together with fragment offset. */ + pg_edit_t mf_flag, df_flag, ce_flag; + + pg_edit_t ttl; + + pg_edit_t protocol; + + pg_edit_t checksum; + + pg_edit_t src_address, dst_address; +} pg_ip4_header_t; + +static inline void +pg_ip4_header_init (pg_ip4_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, ip4_header_t, f); + _ (tos); + _ (length); + _ (fragment_id); + _ (ttl); + _ (protocol); + _ (checksum); + _ (src_address); + _ (dst_address); +#undef _ + + /* Initialize bit fields. */ + pg_edit_init_bitfield (&p->header_length, ip4_header_t, + ip_version_and_header_length, + 0, 4); + pg_edit_init_bitfield (&p->ip_version, ip4_header_t, + ip_version_and_header_length, + 4, 4); + + pg_edit_init_bitfield (&p->fragment_offset, ip4_header_t, + flags_and_fragment_offset, + 0, 13); + pg_edit_init_bitfield (&p->mf_flag, ip4_header_t, + flags_and_fragment_offset, + 13, 1); + pg_edit_init_bitfield (&p->df_flag, ip4_header_t, + flags_and_fragment_offset, + 14, 1); + pg_edit_init_bitfield (&p->ce_flag, ip4_header_t, + flags_and_fragment_offset, + 15, 1); +} + +uword +unformat_pg_ip4_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_ip4_header_t * p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip4_header_t), + &group_index); + pg_ip4_header_init (p); + + /* Defaults. */ + pg_edit_set_fixed (&p->ip_version, 4); + pg_edit_set_fixed (&p->header_length, + sizeof (ip4_header_t) / sizeof (u32)); + + pg_edit_set_fixed (&p->tos, 0); + pg_edit_set_fixed (&p->ttl, 64); + + pg_edit_set_fixed (&p->fragment_id, 0); + pg_edit_set_fixed (&p->fragment_offset, 0); + pg_edit_set_fixed (&p->mf_flag, 0); + pg_edit_set_fixed (&p->df_flag, 0); + pg_edit_set_fixed (&p->ce_flag, 0); + + p->length.type = PG_EDIT_UNSPECIFIED; + p->checksum.type = PG_EDIT_UNSPECIFIED; + + if (unformat (input, "%U: %U -> %U", + unformat_pg_edit, + unformat_ip_protocol, &p->protocol, + unformat_pg_edit, + unformat_ip4_address, &p->src_address, + unformat_pg_edit, + unformat_ip4_address, &p->dst_address)) + goto found; + + if (! unformat (input, "%U:", + unformat_pg_edit, + unformat_ip_protocol, &p->protocol)) + goto error; + +found: + /* Parse options. */ + while (1) + { + if (unformat (input, "version %U", + unformat_pg_edit, + unformat_pg_number, &p->ip_version)) + ; + + else if (unformat (input, "header-length %U", + unformat_pg_edit, + unformat_pg_number, &p->header_length)) + ; + + else if (unformat (input, "tos %U", + unformat_pg_edit, + unformat_pg_number, &p->tos)) + ; + + else if (unformat (input, "length %U", + unformat_pg_edit, + unformat_pg_number, &p->length)) + ; + + else if (unformat (input, "checksum %U", + unformat_pg_edit, + unformat_pg_number, &p->checksum)) + ; + + else if (unformat (input, "ttl %U", + unformat_pg_edit, + unformat_pg_number, &p->ttl)) + ; + + else if (unformat (input, "fragment id %U offset %U", + unformat_pg_edit, + unformat_pg_number, &p->fragment_id, + unformat_pg_edit, + unformat_pg_number, &p->fragment_offset)) + { + int i; + for (i = 0; i< ARRAY_LEN (p->fragment_offset.values); i++) + pg_edit_set_value (&p->fragment_offset, i, + pg_edit_get_value (&p->fragment_offset, i) / 8); + + } + + /* Flags. */ + else if (unformat (input, "mf") || unformat (input, "MF")) + pg_edit_set_fixed (&p->mf_flag, 1); + + else if (unformat (input, "df") || unformat (input, "DF")) + pg_edit_set_fixed (&p->df_flag, 1); + + else if (unformat (input, "ce") || unformat (input, "CE")) + pg_edit_set_fixed (&p->ce_flag, 1); + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t * im = &ip_main; + ip_protocol_t protocol; + ip_protocol_info_t * pi; + + pi = 0; + if (p->protocol.type == PG_EDIT_FIXED) + { + protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO); + pi = ip_get_protocol_info (im, protocol); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->length.type == PG_EDIT_UNSPECIFIED + && s->min_packet_bytes == s->max_packet_bytes + && group_index + 1 < vec_len (s->edit_groups)) + { + pg_edit_set_fixed (&p->length, + pg_edit_group_n_bytes (s, group_index)); + } + + /* Compute IP header checksum if all edits are fixed. */ + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + { + ip4_header_t fixed_header, fixed_mask, cmp_mask; + + /* See if header is all fixed and specified except for + checksum field. */ + memset (&cmp_mask, ~0, sizeof (cmp_mask)); + cmp_mask.checksum = 0; + + pg_edit_group_get_fixed_packet_data (s, group_index, + &fixed_header, &fixed_mask); + if (! memcmp (&fixed_mask, &cmp_mask, sizeof (cmp_mask))) + pg_edit_set_fixed (&p->checksum, + clib_net_to_host_u16 (ip4_header_checksum (&fixed_header))); + } + + p = pg_get_edit_group (s, group_index); + if (p->length.type == PG_EDIT_UNSPECIFIED + || p->checksum.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + g->edit_function = ip4_pg_edit_function; + g->edit_function_opaque = 0; + if (p->length.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= IP4_PG_EDIT_LENGTH; + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= IP4_PG_EDIT_CHECKSUM; + } + + return 1; + } + + error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + diff --git a/vnet/vnet/ip/ip4_source_check.c b/vnet/vnet/ip/ip4_source_check.c new file mode 100644 index 00000000000..47e22f2392e --- /dev/null +++ b/vnet/vnet/ip/ip4_source_check.c @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_source_check.c: IP v4 check source address (unicast RPF check) + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +typedef struct { + u8 packet_data[64]; +} ip4_source_check_trace_t; + +static u8 * format_ip4_source_check_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ip4_source_check_trace_t * t = va_arg (*va, ip4_source_check_trace_t *); + + s = format (s, "%U", + format_ip4_header, + t->packet_data, sizeof (t->packet_data)); + + return s; +} + +typedef enum { + IP4_SOURCE_CHECK_NEXT_DROP, + IP4_SOURCE_CHECK_N_NEXT, +} ip4_source_check_next_t; + +typedef enum { + IP4_SOURCE_CHECK_REACHABLE_VIA_RX, + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY, +} ip4_source_check_type_t; + +typedef union { + struct { + u32 no_default_route : 1; + u32 fib_index : 31; + }; + u32 as_u32[1]; +} ip4_source_check_config_t; + +always_inline uword +ip4_source_check_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + ip4_source_check_type_t source_check_type) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + ip_config_main_t * cm = &lm->rx_config_mains[VNET_UNICAST]; + u32 n_left_from, * from, * to_next; + u32 next_index; + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (ip4_source_check_trace_t)); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * p0, * p1; + ip4_header_t * ip0, * ip1; + ip4_fib_mtrie_t * mtrie0, * mtrie1; + ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_source_check_config_t * c0, * c1; + ip_adjacency_t * adj0, * adj1; + u32 pi0, next0, pass0, adj_index0; + u32 pi1, next1, pass1, adj_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + c0 = vnet_get_config_data (&cm->config_main, + &vnet_buffer (p0)->ip.current_config_index, + &next0, + sizeof (c0[0])); + c1 = vnet_get_config_data (&cm->config_main, + &vnet_buffer (p1)->ip.current_config_index, + &next1, + sizeof (c1[0])); + + mtrie0 = &vec_elt_at_index (im->fibs, c0->fib_index)->mtrie; + mtrie1 = &vec_elt_at_index (im->fibs, c1->fib_index)->mtrie; + + leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3); + + adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + + ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, c0->fib_index, + &ip0->src_address, + c0->no_default_route)); + ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, c1->fib_index, + &ip1->src_address, + c1->no_default_route)); + + adj0 = ip_get_adjacency (lm, adj_index0); + adj1 = ip_get_adjacency (lm, adj_index1); + + /* Pass multicast. */ + pass0 = ip4_address_is_multicast (&ip0->src_address) || ip0->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF); + pass1 = ip4_address_is_multicast (&ip1->src_address) || ip1->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF); + + pass0 |= (adj0->lookup_next_index == IP_LOOKUP_NEXT_REWRITE + && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY + || vnet_buffer (p0)->sw_if_index[VLIB_RX] == adj0->rewrite_header.sw_if_index)); + pass1 |= (adj1->lookup_next_index == IP_LOOKUP_NEXT_REWRITE + && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY + || vnet_buffer (p1)->sw_if_index[VLIB_RX] == adj1->rewrite_header.sw_if_index)); + + next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP); + next1 = (pass1 ? next1 : IP4_SOURCE_CHECK_NEXT_DROP); + + p0->error = error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS]; + p1->error = error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS]; + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + ip4_fib_mtrie_t * mtrie0; + ip4_fib_mtrie_leaf_t leaf0; + ip4_source_check_config_t * c0; + ip_adjacency_t * adj0; + u32 pi0, next0, pass0, adj_index0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip0 = vlib_buffer_get_current (p0); + + c0 = vnet_get_config_data (&cm->config_main, + &vnet_buffer (p0)->ip.current_config_index, + &next0, + sizeof (c0[0])); + + mtrie0 = &vec_elt_at_index (im->fibs, c0->fib_index)->mtrie; + + leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); + + adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + + ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, c0->fib_index, + &ip0->src_address, + c0->no_default_route)); + adj0 = ip_get_adjacency (lm, adj_index0); + + /* Pass multicast. */ + pass0 = ip4_address_is_multicast (&ip0->src_address) || ip0->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF); + + pass0 |= (adj0->lookup_next_index == IP_LOOKUP_NEXT_REWRITE + && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY + || vnet_buffer (p0)->sw_if_index[VLIB_RX] == adj0->rewrite_header.sw_if_index)); + + next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP); + p0->error = error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +ip4_source_check_reachable_via_any (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_source_check_inline (vm, node, frame, IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); +} + +static uword +ip4_source_check_reachable_via_rx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_source_check_inline (vm, node, frame, IP4_SOURCE_CHECK_REACHABLE_VIA_RX); +} + +VLIB_REGISTER_NODE (ip4_check_source_reachable_via_any) = { + .function = ip4_source_check_reachable_via_any, + .name = "ip4-source-check-via-any", + .vector_size = sizeof (u32), + + .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT, + .next_nodes = { + [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop", + }, + + .format_buffer = format_ip4_header, + .format_trace = format_ip4_source_check_trace, +}; + +VLIB_REGISTER_NODE (ip4_check_source_reachable_via_rx) = { + .function = ip4_source_check_reachable_via_rx, + .name = "ip4-source-check-via-rx", + .vector_size = sizeof (u32), + + .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT, + .next_nodes = { + [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop", + }, + + .format_buffer = format_ip4_header, + .format_trace = format_ip4_source_check_trace, +}; + +static clib_error_t * +set_ip_source_check (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + ip_config_main_t * rx_cm = &lm->rx_config_mains[VNET_UNICAST]; + clib_error_t * error = 0; + u32 sw_if_index, is_del, ci; + ip4_source_check_config_t config; + ip4_rx_feature_type_t type; + + sw_if_index = ~0; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + is_del = 0; + config.no_default_route = 0; + config.fib_index = im->fib_index_by_sw_if_index[sw_if_index]; + type = IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX; + if (unformat (input, "del")) + is_del = 1; + + ci = rx_cm->config_index_by_sw_if_index[sw_if_index]; + ci = (is_del + ? vnet_config_del_feature + : vnet_config_add_feature) + (vm, &rx_cm->config_main, + ci, + type, + &config, + sizeof (config)); + rx_cm->config_index_by_sw_if_index[sw_if_index] = ci; + + done: + return error; +} + +VLIB_CLI_COMMAND (set_interface_ip_source_check_command, static) = { + .path = "set interface ip source-check", + .function = set_ip_source_check, + .short_help = "Set IP4/IP6 interface unicast source check", +}; + +/* Dummy init function to get us linked in. */ +clib_error_t * ip4_source_check_init (vlib_main_t * vm) +{ return 0; } + +VLIB_INIT_FUNCTION (ip4_source_check_init); diff --git a/vnet/vnet/ip/ip4_test.c b/vnet/vnet/ip/ip4_test.c new file mode 100644 index 00000000000..ff088e78f3e --- /dev/null +++ b/vnet/vnet/ip/ip4_test.c @@ -0,0 +1,311 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> + +/* + * ip4 FIB tester. Add, probe, delete a bunch of + * random routes / masks and make sure that the mtrie agrees with + * the hash-table FIB. + * + * Manipulate the FIB by means of the debug CLI commands, to minimize + * the chances of doing something idiotic. + */ + +/* + * These routines need to be redeclared non-static elsewhere. + * + * Also: rename ip_route() -> vnet_ip_route_cmd() and add the usual + * test_route_init() call to main.c + */ +clib_error_t * +vnet_ip_route_cmd (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd_arg); + +int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0); + +ip4_fib_t * +find_fib_by_table_index_or_id (ip4_main_t * im, u32 table_index_or_id, + u32 flags); + +/* Routes to insert/delete/probe in FIB */ +typedef struct { + ip4_address_t address; + u32 mask_width; + u32 interface_id; /* not an xx_if_index */ +} test_route_t; + +typedef struct { + /* Test routes in use */ + test_route_t *route_pool; + + /* Number of fake ethernets created */ + u32 test_interfaces_created; +} test_main_t; + +test_main_t test_main; + +/* fake ethernet device class, distinct from "fake-ethX" */ +static u8 * format_test_interface_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "test-eth%d", dev_instance); +} + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +VNET_DEVICE_CLASS (test_interface_device_class,static) = { + .name = "Test interface", + .format_device_name = format_test_interface_name, + .tx_function = dummy_interface_tx, +}; + +static clib_error_t * +thrash (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd_arg) +{ + u32 seed = 0xdeaddabe; + u32 niter = 10; + u32 nroutes = 10; + u32 ninterfaces = 4; + f64 min_mask_bits = 7.0; + f64 max_mask_bits = 32.0; + u32 table_id = 11; /* my amp goes to 11 (use fib 11) */ + u32 table_index; + int iter, i; + u8 * cmd; + test_route_t *tr; + test_main_t *tm = &test_main; + ip4_main_t * im = &ip4_main; + vnet_main_t * vnm = vnet_get_main(); + unformat_input_t cmd_input; + f64 rf; + u32 *masks = 0; + u32 tmp; + u32 hw_if_index; + clib_error_t * error = 0; + uword *p; + unformat_input_t _line_input, * line_input = &_line_input; + u8 hw_address[6]; + ip4_fib_t * fib; + int verbose = 0; + + /* Precompute mask width -> mask vector */ + tmp = (u32)~0; + vec_validate (masks, 32); + for (i = 32; i > 0; i--) + { + masks [i] = tmp; + tmp <<= 1; + } + + if (unformat_user (main_input, unformat_line_input, line_input)) + { + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "seed %d", &seed)) + ; + else if (unformat (line_input, "niter %d", &niter)) + ; + else if (unformat (line_input, "nroutes %d", &nroutes)) + ; + else if (unformat (line_input, "ninterfaces %d", &ninterfaces)) + ; + else if (unformat (line_input, "min-mask-bits %d", &tmp)) + min_mask_bits = (f64) tmp; + else if (unformat (line_input, "max-mask-bits %d", &tmp)) + max_mask_bits = (f64) tmp; + else if (unformat (line_input, "verbose")) + verbose = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + } + } + + /* Find or create FIB table 11 */ + fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID); + + for (i = tm->test_interfaces_created; i < ninterfaces; i++) + { + vnet_hw_interface_t * hw; + memset (hw_address, 0, sizeof (hw_address)); + hw_address[0] = 0xd0; + hw_address[1] = 0x0f; + hw_address[5] = i; + + error = ethernet_register_interface + (vnm, + test_interface_device_class.index, + i /* instance */, + hw_address, + &hw_if_index, + /* flag change */ 0); + + /* Fake interfaces use FIB table 11 */ + hw = vnet_get_hw_interface (vnm, hw_if_index); + vec_validate (im->fib_index_by_sw_if_index, hw->sw_if_index); + im->fib_index_by_sw_if_index[hw->sw_if_index] = fib->index; + } + + tm->test_interfaces_created = ninterfaces; + + /* Find fib index corresponding to FIB id 11 */ + p = hash_get (im->fib_index_by_table_id, table_id); + if (p == 0) + { + vlib_cli_output (vm, "Couldn't map fib id %d to fib index\n", + table_id); + return 0; + } + table_index = p[0]; + + for (iter = 0; iter < niter; iter++) + { + /* Pick random routes to install */ + for (i = 0; i < nroutes; i++) + { + int j; + + pool_get (tm->route_pool, tr); + memset (tr, 0, sizeof (*tr)); + + again: + rf = random_f64 (&seed); + tr->mask_width = (u32) (min_mask_bits + + rf * (max_mask_bits - min_mask_bits)); + tmp = random_u32 (&seed); + tmp &= masks[tr->mask_width]; + tr->address.as_u32 = clib_host_to_net_u32(tmp); + + /* We can't add the same address/mask twice... */ + for (j = 0; j < i; j++) + { + test_route_t *prev; + prev = pool_elt_at_index (tm->route_pool, j); + if ((prev->address.as_u32 == tr->address.as_u32) + && (prev->mask_width == tr->mask_width)) + goto again; + } + + rf = random_f64 (&seed); + tr->interface_id = (u32) (rf * ninterfaces); + } + + /* Add them */ + for (i = 0; i < nroutes; i++) + { + tr = pool_elt_at_index (tm->route_pool, i); + cmd = format (0, "add table %d %U/%d via test-eth%d", + table_id, + format_ip4_address, &tr->address, + tr->mask_width, tr->interface_id); + vec_add1(cmd,0); + if (verbose) + fformat(stderr, "ip route %s\n", cmd); + unformat_init_string (&cmd_input, (char *) cmd, vec_len(cmd)-1); + error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg); + if (error) + clib_error_report(error); + unformat_free (&cmd_input); + vec_free(cmd); + } + /* Probe them */ + for (i = 0; i < nroutes; i++) + { + tr = pool_elt_at_index (tm->route_pool, i); + if (!ip4_lookup_validate (&tr->address, table_index)) + { + if (verbose) + fformat (stderr, "test lookup table %d %U\n", + table_index, format_ip4_address, &tr->address); + + fformat (stderr, "FAIL-after-insert: %U/%d\n", + format_ip4_address, &tr->address, + tr->mask_width); + } + } + + /* Delete them */ + for (i = 0; i < nroutes; i++) + { + int j; + tr = pool_elt_at_index (tm->route_pool, i); + if (0) + cmd = format (0, "del table %d %U/%d via test-eth%d", + table_id, + format_ip4_address, &tr->address, + tr->mask_width, tr->interface_id); + else + cmd = format (0, "del table %d %U/%d", + table_id, + format_ip4_address, &tr->address, + tr->mask_width); + vec_add1(cmd,0); + if (verbose) + fformat(stderr, "ip route %s\n", cmd); + unformat_init_string (&cmd_input, (char *) cmd, vec_len(cmd)-1); + error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg); + if (error) + clib_error_report(error); + unformat_free (&cmd_input); + vec_free(cmd); + + /* Make sure all undeleted routes still work */ + for (j = i+1; j < nroutes; j++) + { + test_route_t *rr; /* remaining route */ + rr = pool_elt_at_index (tm->route_pool, j); + if (!ip4_lookup_validate (&rr->address, table_index)) + { + if (verbose) + fformat (stderr, "test lookup table %d %U\n", + table_index, format_ip4_address, &rr->address); + + fformat (stderr, "FAIL: %U/%d AWOL\n", + format_ip4_address, &rr->address, + rr->mask_width); + fformat (stderr, " iter %d after %d of %d deletes\n", + iter, i, nroutes); + fformat (stderr, " last route deleted %U/%d\n", + format_ip4_address, &tr->address, + tr->mask_width); + } + } + } + + pool_free (tm->route_pool); + } + return 0; +} + +VLIB_CLI_COMMAND (test_route_command, static) = { + .path = "test route", + .short_help = "test route", + .function = thrash, +}; + +clib_error_t *test_route_init (vlib_main_t *vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (test_route_init); diff --git a/vnet/vnet/ip/ip6.h b/vnet/vnet/ip/ip6.h new file mode 100644 index 00000000000..a5c322a2fa5 --- /dev/null +++ b/vnet/vnet/ip/ip6.h @@ -0,0 +1,503 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6.h: ip6 main include file + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_ip6_h +#define included_ip_ip6_h + +#include <vlib/mc.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/ip/lookup.h> + +#include <vppinfra/bihash_24_8.h> +#include <vppinfra/bihash_template.h> + +/* + * Default size of the ip6 fib hash table + */ +#define IP6_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024) +#define IP6_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20) + +typedef struct { + ip6_address_t addr; + u32 dst_address_length; + u32 vrf_index; +} ip6_fib_key_t; + +typedef struct { + /* Table ID (hash key) for this FIB. */ + u32 table_id; + + /* Index into FIB vector. */ + u32 index; + + /* flow hash configuration */ + u32 flow_hash_config; +} ip6_fib_t; + +struct ip6_main_t; + +typedef void (ip6_add_del_route_function_t) + (struct ip6_main_t * im, + uword opaque, + ip6_fib_t * fib, + u32 flags, + ip6_address_t * address, + u32 address_length, + void * old_result, + void * new_result); + +typedef struct { + ip6_add_del_route_function_t * function; + uword required_flags; + uword function_opaque; +} ip6_add_del_route_callback_t; + +typedef void (ip6_add_del_interface_address_function_t) + (struct ip6_main_t * im, + uword opaque, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, + u32 if_address_index, + u32 is_del); + +typedef struct { + ip6_add_del_interface_address_function_t * function; + uword function_opaque; +} ip6_add_del_interface_address_callback_t; + +typedef enum { + /* First check access list to either permit or deny this + packet based on classification. */ + IP6_RX_FEATURE_CHECK_ACCESS, + + /* RPF check: verify that source address is reachable via + RX interface or via any interface. */ + IP6_RX_FEATURE_CHECK_SOURCE_REACHABLE_VIA_RX, + IP6_RX_FEATURE_CHECK_SOURCE_REACHABLE_VIA_ANY, + + /* IPSec */ + IP6_RX_FEATURE_IPSEC, + + /* Intercept and decap L2TPv3 packets. */ + IP6_RX_FEATURE_L2TPV3, + + /* vPath forwarding: won't return to call next feature + so any feature needed before vPath forwarding must be prior + to this entry */ + IP6_RX_FEATURE_VPATH, + + /* Must be last: perform forwarding lookup. */ + IP6_RX_FEATURE_LOOKUP, + + IP6_N_RX_FEATURE, +} ip6_rx_feature_type_t; + +typedef struct ip6_main_t { + BVT(clib_bihash) ip6_lookup_table; + + ip_lookup_main_t lookup_main; + + /* bitmap / refcounts / vector of mask widths to search */ + uword * non_empty_dst_address_length_bitmap; + u8 * prefix_lengths_in_search_order; + i32 dst_address_length_refcounts[129]; + + /* Vector of FIBs. */ + ip6_fib_t * fibs; + + ip6_address_t fib_masks[129]; + + /* Table index indexed by software interface. */ + u32 * fib_index_by_sw_if_index; + + /* Hash table mapping table id to fib index. + ID space is not necessarily dense; index space is dense. */ + uword * fib_index_by_table_id; + + /* Vector of functions to call when routes are added/deleted. */ + ip6_add_del_route_callback_t * add_del_route_callbacks; + + /* Hash table mapping interface rewrite adjacency index by sw if index. */ + uword * interface_route_adj_index_by_sw_if_index; + + /* Functions to call when interface address changes. */ + ip6_add_del_interface_address_callback_t * add_del_interface_address_callbacks; + + /* Template used to generate IP6 neighbor solicitation packets. */ + vlib_packet_template_t discover_neighbor_packet_template; + + u32 * discover_neighbor_next_index_by_hw_if_index; + + /* ip6 lookup table config parameters */ + u32 lookup_table_nbuckets; + uword lookup_table_size; + + /* Seed for Jenkins hash used to compute ip6 flow hash. */ + u32 flow_hash_seed; + + struct { + /* TTL to use for host generated packets. */ + u8 ttl; + + u8 pad[3]; + } host_config; +} ip6_main_t; + +/* Global ip6 main structure. */ +extern ip6_main_t ip6_main; + +/* Global ip6 input node. Errors get attached to ip6 input node. */ +extern vlib_node_registration_t ip6_input_node; +extern vlib_node_registration_t ip6_rewrite_node; +extern vlib_node_registration_t ip6_discover_neighbor_node; + +extern vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node; + +/* ipv6 neighbor discovery - timer/event types */ +typedef enum { + ICMP6_ND_EVENT_INIT, +} ip6_icmp_neighbor_discovery_event_type_t; + +typedef union { + u32 add_del_swindex; + struct { + u32 up_down_swindex; + u32 fib_index; + } up_down_event; +} ip6_icmp_neighbor_discovery_event_data_t; + +u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst); +u32 ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index, + ip6_address_t * dst); +ip6_fib_t * find_ip6_fib_by_table_index_or_id (ip6_main_t * im, + u32 table_index_or_id, + u32 flags); + +always_inline uword +ip6_destination_matches_route (ip6_main_t * im, + ip6_address_t * key, + ip6_address_t * dest, + uword dest_length) +{ + int i; + for (i = 0; i < ARRAY_LEN (key->as_uword); i++) + { + if ((key->as_uword[i] ^ dest->as_uword[i]) & im->fib_masks[dest_length].as_uword[i]) + return 0; + } + return 1; +} + +always_inline uword +ip6_destination_matches_interface (ip6_main_t * im, + ip6_address_t * key, + ip_interface_address_t * ia) +{ + ip6_address_t * a = ip_interface_address_get_address (&im->lookup_main, ia); + return ip6_destination_matches_route (im, key, a, ia->address_length); +} + +/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */ +always_inline uword +ip6_unaligned_destination_matches_route (ip6_main_t * im, + ip6_address_t * key, + ip6_address_t * dest, + uword dest_length) +{ + int i; + for (i = 0; i < ARRAY_LEN (key->as_uword); i++) + { + if ((clib_mem_unaligned (&key->as_uword[i], uword) ^ dest->as_uword[i]) & im->fib_masks[dest_length].as_uword[i]) + return 0; + } + return 1; +} + +always_inline void +ip6_src_address_for_packet (ip6_main_t * im, vlib_buffer_t * p, ip6_address_t * src, u32 sw_if_index) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = ip_interface_address_for_packet (lm, p, sw_if_index); + ip6_address_t * a = ip_interface_address_get_address (lm, ia); + *src = a[0]; +} + +always_inline u32 +ip6_src_lookup_for_packet (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i) +{ + if (vnet_buffer (b)->ip.adj_index[VLIB_RX] == ~0) + vnet_buffer (b)->ip.adj_index[VLIB_RX] + = ip6_fib_lookup (im, vnet_buffer (b)->sw_if_index[VLIB_RX], + &i->src_address); + return vnet_buffer (b)->ip.adj_index[VLIB_RX]; +} + +/* Find interface address which matches destination. */ +always_inline ip6_address_t * +ip6_interface_address_matching_destination (ip6_main_t * im, ip6_address_t * dst, u32 sw_if_index, + ip_interface_address_t ** result_ia) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia; + ip6_address_t * result = 0; + + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip6_address_t * a = ip_interface_address_get_address (lm, ia); + if (ip6_destination_matches_route (im, dst, a, ia->address_length)) + { + result = a; + break; + } + })); + if (result_ia) + *result_ia = result ? ia : 0; + return result; +} + +clib_error_t * +ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, + ip6_address_t * address, u32 address_length, + u32 is_del); + +int ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2); + +/* Add/del a route to the FIB. */ + +#define IP6_ROUTE_FLAG_ADD (0 << 0) +#define IP6_ROUTE_FLAG_DEL (1 << 0) +#define IP6_ROUTE_FLAG_TABLE_ID (0 << 1) +#define IP6_ROUTE_FLAG_FIB_INDEX (1 << 1) +#define IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY (1 << 2) +#define IP6_ROUTE_FLAG_NO_REDISTRIBUTE (1 << 3) +#define IP6_ROUTE_FLAG_NOT_LAST_IN_GROUP (1 << 4) +/* Dynamic route created via neighbor discovery. */ +#define IP6_ROUTE_FLAG_NEIGHBOR (1 << 5) + +typedef struct { + /* IP6_ROUTE_FLAG_* */ + u32 flags; + + /* Either index of fib or table_id to hash and get fib. + IP6_ROUTE_FLAG_FIB_INDEX specifies index; otherwise table_id is assumed. */ + u32 table_index_or_table_id; + + /* Destination address (prefix) and length. */ + ip6_address_t dst_address; + u32 dst_address_length; + + /* Adjacency to use for this destination. */ + u32 adj_index; + + /* If specified adjacencies to add and then + use for this destination. add_adj/n_add_adj + are override adj_index if specified. */ + ip_adjacency_t * add_adj; + u32 n_add_adj; +} ip6_add_del_route_args_t; + +void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * args); + +void ip6_add_del_route_next_hop (ip6_main_t * im, + u32 flags, + ip6_address_t * dst_address, + u32 dst_address_length, + ip6_address_t * next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_weight, u32 adj_index, + u32 explicit_fib_index); +u32 +ip6_get_route (ip6_main_t * im, + u32 fib_index_or_table_id, + u32 flags, + ip6_address_t * address, + u32 address_length); + +void +ip6_foreach_matching_route (ip6_main_t * im, + u32 table_index_or_table_id, + u32 flags, + ip6_address_t * address, + u32 address_length, + ip6_address_t ** results, + u8 ** result_length); + +void ip6_delete_matching_routes (ip6_main_t * im, + u32 table_index_or_table_id, + u32 flags, + ip6_address_t * address, + u32 address_length); + +void ip6_maybe_remap_adjacencies (ip6_main_t * im, + u32 table_index_or_table_id, + u32 flags); + +void ip6_adjacency_set_interface_route (vnet_main_t * vnm, + ip_adjacency_t * adj, + u32 sw_if_index, + u32 if_address_index); + +clib_error_t * +ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index); + +clib_error_t * +ip6_set_neighbor_limit (u32 neighbor_limit); + +uword +ip6_tcp_register_listener (vlib_main_t * vm, + u16 dst_port, + u32 next_node_index); +uword +ip6_udp_register_listener (vlib_main_t * vm, + u16 dst_port, + u32 next_node_index); + +u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6_header_t * ip0, int *bogus_lengthp); + +void ip6_register_protocol (u32 protocol, u32 node_index); + +serialize_function_t serialize_vnet_ip6_main, unserialize_vnet_ip6_main; + +int +vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * a, + u8 * link_layer_address, + uword n_bytes_link_layer_address); +int +vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * a, + u8 * link_layer_address, + uword n_bytes_link_layer_address); +void +vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index); + +void +ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip, + u8 *mac); + +void +ip6_ethernet_mac_address_from_link_local_address (u8 *mac, + ip6_address_t *ip); + +int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config); + +int +ip6_neighbor_ra_config(vlib_main_t * vm, u32 sw_if_index, + u8 surpress, u8 managed, u8 other, + u8 ll_option, u8 send_unicast, u8 cease, + u8 use_lifetime, u32 lifetime, + u32 initial_count, u32 initial_interval, + u32 max_interval, u32 min_interval, + u8 is_no); + +int +ip6_neighbor_ra_prefix(vlib_main_t * vm, u32 sw_if_index, + ip6_address_t *prefix_addr, u8 prefix_len, + u8 use_default, u32 val_lifetime, u32 pref_lifetime, + u8 no_advertise, u8 off_link, u8 no_autoconfig, u8 no_onlink, + u8 is_no); + + +clib_error_t * +enable_ip6_interface(vlib_main_t * vm, + u32 sw_if_index); + +clib_error_t * +disable_ip6_interface(vlib_main_t * vm, + u32 sw_if_index); + +int +ip6_interface_enabled(vlib_main_t * vm, + u32 sw_if_index); + +clib_error_t * +set_ip6_link_local_address(vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t *address, + u8 address_length); + +void vnet_register_ip6_neighbor_resolution_event(vnet_main_t * vnm, + void * address_arg, + uword node_index, + uword type_opaque, + uword data); + +int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 table_index); +extern vlib_node_registration_t ip6_lookup_node; + +/* Compute flow hash. We'll use it to select which Sponge to use for this + flow. And other things. */ +always_inline u32 +ip6_compute_flow_hash (ip6_header_t * ip, u32 flow_hash_config) +{ + tcp_header_t * tcp = (void *) (ip + 1); + u64 a, b, c; + u64 t1, t2; + uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP + || ip->protocol == IP_PROTOCOL_UDP); + + t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]); + t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? t1 : 0; + + t2 = (ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1]); + t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) ? t2 : 0; + + a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1; + b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2; + b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0; + + t1 = is_tcp_udp ? tcp->ports.src : 0; + t2 = is_tcp_udp ? tcp->ports.dst : 0; + + t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0; + t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0; + + c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? + ((t1<<16) | t2) : ((t2<<16) | t1); + + hash_mix64 (a, b, c); + return (u32) c; +} + +#endif /* included_ip_ip6_h */ diff --git a/vnet/vnet/ip/ip6_error.h b/vnet/vnet/ip/ip6_error.h new file mode 100644 index 00000000000..93754a10fcc --- /dev/null +++ b/vnet/vnet/ip/ip6_error.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6_error.h: ip6 fast path errors + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_ip6_error_h +#define included_ip_ip6_error_h + +#define foreach_ip6_error \ + /* Must be first. */ \ + _ (NONE, "valid ip6 packets") \ + \ + /* Errors signalled by ip6-input */ \ + _ (TOO_SHORT, "ip6 length < 40 bytes") \ + _ (BAD_LENGTH, "ip6 length > l2 length") \ + _ (VERSION, "ip6 version != 6") \ + _ (TIME_EXPIRED, "ip6 ttl <= 1") \ + \ + /* Errors signalled by ip6-rewrite. */ \ + _ (MTU_EXCEEDED, "ip6 MTU exceeded") \ + _ (DST_LOOKUP_MISS, "ip6 destination lookup miss") \ + _ (SRC_LOOKUP_MISS, "ip6 source lookup miss") \ + _ (ADJACENCY_DROP, "ip6 adjacency drop") \ + _ (ADJACENCY_PUNT, "ip6 adjacency punt") \ + \ + /* Errors signalled by ip6-local. */ \ + _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \ + _ (UDP_CHECKSUM, "bad udp checksum") \ + _ (TCP_CHECKSUM, "bad tcp checksum") \ + _ (ICMP_CHECKSUM, "bad icmp checksum") \ + _ (UDP_LENGTH, "inconsistent udp/ip lengths") \ + \ + /* Errors signalled by {tcp6,udp6}-lookup. */ \ + _ (UNKNOWN_UDP_PORT, "no listener for udp port") \ + _ (UNKNOWN_TCP_PORT, "no listener for tcp port") \ + \ + /* Spoofed packets in ip6-rewrite-local */ \ + _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \ + \ + /* Erros singalled by ip6-inacl */ \ + _ (INACL_TABLE_MISS, "input ACL table-miss drops") \ + _ (INACL_SESSION_DENY, "input ACL session deny drops") + +typedef enum { +#define _(sym,str) IP6_ERROR_##sym, + foreach_ip6_error +#undef _ + IP6_N_ERROR, +} ip6_error_t; + +#endif /* included_ip_ip6_error_h */ diff --git a/vnet/vnet/ip/ip6_format.c b/vnet/vnet/ip/ip6_format.c new file mode 100644 index 00000000000..1a2810e16ec --- /dev/null +++ b/vnet/vnet/ip/ip6_format.c @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6_format.c: ip6 formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +/* Format an IP6 address. */ +u8 * format_ip6_address (u8 * s, va_list * args) +{ + ip6_address_t * a = va_arg (*args, ip6_address_t *); + u32 max_zero_run = 0, this_zero_run = 0; + int max_zero_run_index = -1, this_zero_run_index=0; + int in_zero_run = 0, i; + int last_double_colon = 0; + + /* Ugh, this is a pain. Scan forward looking for runs of 0's */ + for (i = 0; i < ARRAY_LEN (a->as_u16); i++) + { + if (a->as_u16[i] == 0) + { + if (in_zero_run) + this_zero_run++; + else + { + in_zero_run = 1; + this_zero_run =1; + this_zero_run_index = i; + } + } + else + { + if (in_zero_run) + { + /* offer to compress the biggest run of > 1 zero */ + if (this_zero_run > max_zero_run && this_zero_run > 1) + { + max_zero_run_index = this_zero_run_index; + max_zero_run = this_zero_run; + } + } + in_zero_run = 0; + this_zero_run = 0; + } + } + + if (in_zero_run) + { + if (this_zero_run > max_zero_run && this_zero_run > 1) + { + max_zero_run_index = this_zero_run_index; + max_zero_run = this_zero_run; + } + } + + for (i = 0; i < ARRAY_LEN (a->as_u16); i++) + { + if (i == max_zero_run_index) + { + s = format (s, "::"); + i += max_zero_run - 1; + last_double_colon = 1; + } + else + { + s = format (s, "%s%x", + (last_double_colon || i == 0) ? "" : ":", + clib_net_to_host_u16 (a->as_u16[i])); + last_double_colon = 0; + } + } + + return s; +} + +/* Format an IP6 route destination and length. */ +u8 * format_ip6_address_and_length (u8 * s, va_list * args) +{ + ip6_address_t * a = va_arg (*args, ip6_address_t *); + u8 l = va_arg (*args, u32); + return format (s, "%U/%d", format_ip6_address, a, l); +} + +/* Parse an IP6 address. */ +uword unformat_ip6_address (unformat_input_t * input, va_list * args) +{ + ip6_address_t * result = va_arg (*args, ip6_address_t *); + u16 hex_quads[8]; + uword hex_quad, n_hex_quads, hex_digit, n_hex_digits; + uword c, n_colon, double_colon_index; + + n_hex_quads = hex_quad = n_hex_digits = n_colon = 0; + double_colon_index = ARRAY_LEN (hex_quads); + while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT) + { + hex_digit = 16; + if (c >= '0' && c <= '9') + hex_digit = c - '0'; + else if (c >= 'a' && c <= 'f') + hex_digit = c + 10 - 'a'; + else if (c >= 'A' && c <= 'F') + hex_digit = c + 10 - 'A'; + else if (c == ':' && n_colon < 2) + n_colon++; + else + { + unformat_put_input (input); + break; + } + + /* Too many hex quads. */ + if (n_hex_quads >= ARRAY_LEN (hex_quads)) + return 0; + + if (hex_digit < 16) + { + hex_quad = (hex_quad << 4) | hex_digit; + + /* Hex quad must fit in 16 bits. */ + if (n_hex_digits >= 4) + return 0; + + n_colon = 0; + n_hex_digits++; + } + + /* Save position of :: */ + if (n_colon == 2) + { + /* More than one :: ? */ + if (double_colon_index < ARRAY_LEN (hex_quads)) + return 0; + double_colon_index = n_hex_quads; + } + + if (n_colon > 0 && n_hex_digits > 0) + { + hex_quads[n_hex_quads++] = hex_quad; + hex_quad = 0; + n_hex_digits = 0; + } + } + + if (n_hex_digits > 0) + hex_quads[n_hex_quads++] = hex_quad; + + { + word i; + + /* Expand :: to appropriate number of zero hex quads. */ + if (double_colon_index < ARRAY_LEN (hex_quads)) + { + word n_zero = ARRAY_LEN (hex_quads) - n_hex_quads; + + for (i = n_hex_quads - 1; i >= (signed) double_colon_index; i--) + hex_quads[n_zero + i] = hex_quads[i]; + + for (i = 0; i < n_zero; i++) + { + ASSERT ((double_colon_index + i) < ARRAY_LEN (hex_quads)); + hex_quads[double_colon_index + i] = 0; + } + + n_hex_quads = ARRAY_LEN (hex_quads); + } + + /* Too few hex quads given. */ + if (n_hex_quads < ARRAY_LEN (hex_quads)) + return 0; + + for (i = 0; i < ARRAY_LEN (hex_quads); i++) + result->as_u16[i] = clib_host_to_net_u16 (hex_quads[i]); + + return 1; + } +} + +/* Format an IP6 header. */ +u8 * format_ip6_header (u8 * s, va_list * args) +{ + ip6_header_t * ip = va_arg (*args, ip6_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 i, ip_version, traffic_class, flow_label; + uword indent; + + /* Nothing to do. */ + if (max_header_bytes < sizeof (ip[0])) + return format (s, "IP header truncated"); + + indent = format_get_indent (s); + indent += 2; + + s = format (s, "%U: %U -> %U", + format_ip_protocol, ip->protocol, + format_ip6_address, &ip->src_address, + format_ip6_address, &ip->dst_address); + + i = clib_net_to_host_u32 (ip->ip_version_traffic_class_and_flow_label); + ip_version = (i >> 28); + traffic_class = (i >> 20) & 0xff; + flow_label = i & pow2_mask (20); + + if (ip_version != 6) + s = format (s, "\n%Uversion %d", + format_white_space, indent, ip_version); + + s = format (s, "\n%Utos 0x%02x, flow label 0x%x, hop limit %d, payload length %d", + format_white_space, indent, + traffic_class, flow_label, ip->hop_limit, + clib_net_to_host_u16 (ip->payload_length)); + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && sizeof (ip[0]) < max_header_bytes) + { + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi = ip_get_protocol_info (im, ip->protocol); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", + format_white_space, indent - 2, + pi->format_header, + /* next protocol header */ (void*) (ip + 1), + max_header_bytes - sizeof (ip[0])); + } + + return s; +} + +/* Parse an IP6 header. */ +uword unformat_ip6_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + ip6_header_t * ip; + int old_length; + + /* Allocate space for IP header. */ + { + void * p; + + old_length = vec_len (*result); + vec_add2 (*result, p, sizeof (ip[0])); + ip = p; + } + + memset (ip, 0, sizeof (ip[0])); + ip->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (6 << 28); + + if (! unformat (input, "%U: %U -> %U", + unformat_ip_protocol, &ip->protocol, + unformat_ip6_address, &ip->src_address, + unformat_ip6_address, &ip->dst_address)) + return 0; + + /* Parse options. */ + while (1) + { + int i; + + if (unformat (input, "tos %U", unformat_vlib_number, &i)) + ip->ip_version_traffic_class_and_flow_label |= clib_host_to_net_u32 ((i & 0xff) << 20); + + else if (unformat (input, "hop-limit %U", unformat_vlib_number, &i)) + ip->hop_limit = i; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + /* Recurse into next protocol layer. */ + { + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi = ip_get_protocol_info (im, ip->protocol); + + if (pi && pi->unformat_header) + { + if (! unformat_user (input, pi->unformat_header, result)) + return 0; + + /* Result may have moved. */ + ip = (void *) *result + old_length; + } + } + + ip->payload_length = clib_host_to_net_u16 (vec_len (*result) - (old_length + sizeof (ip[0]))); + + return 1; +} diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c new file mode 100644 index 00000000000..f0065e969f8 --- /dev/null +++ b/vnet/vnet/ip/ip6_forward.c @@ -0,0 +1,2724 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6_forward.c: IP v6 forwarding + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */ +#include <vnet/srp/srp.h> /* for srp_hw_interface_class */ +#include <vppinfra/cache.h> + +#include <vppinfra/bihash_template.c> + +static void compute_prefix_lengths_in_search_order (ip6_main_t * im) +{ + int i; + vec_reset_length (im->prefix_lengths_in_search_order); + /* Note: bitmap reversed so this is in fact a longest prefix match */ + clib_bitmap_foreach (i, im->non_empty_dst_address_length_bitmap, + ({ + int dst_address_length = 128 - i; + vec_add1 (im->prefix_lengths_in_search_order, dst_address_length); + })); +} + +u32 +ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index, ip6_address_t * dst) +{ + ip_lookup_main_t * lm = &im->lookup_main; + int i, len; + int rv; + BVT(clib_bihash_kv) kv, value; + + len = vec_len (im->prefix_lengths_in_search_order); + + for (i = 0; i < len; i++) + { + int dst_address_length = im->prefix_lengths_in_search_order[i]; + ip6_address_t * mask = &im->fib_masks[dst_address_length]; + + ASSERT(dst_address_length >= 0 && dst_address_length <= 128); + + kv.key[0] = dst->as_u64[0] & mask->as_u64[0]; + kv.key[1] = dst->as_u64[1] & mask->as_u64[1]; + kv.key[2] = ((u64)((fib_index))<<32) | dst_address_length; + + rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value); + if (rv == 0) + return value.value; + } + + return lm->miss_adj_index; +} + +u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst) +{ + u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); + return ip6_fib_lookup_with_table (im, fib_index, dst); +} + +void +vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip6_add_del_route_args_t a; + ip_adjacency_t * adj; + + memset(&a, 0x0, sizeof(ip6_add_del_route_args_t)); + + a.table_index_or_table_id = fib_index; + a.flags = (IP6_ROUTE_FLAG_ADD + | IP6_ROUTE_FLAG_FIB_INDEX + | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY + | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); + + /* Add ff02::1:ff00:0/104 via local route for all tables. + This is required for neighbor discovery to work. */ + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &a.adj_index); + adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; + adj->if_address_index = ~0; + adj->rewrite_header.data_bytes = 0; + + ip6_set_solicited_node_multicast_address (&a.dst_address, 0); + + a.dst_address_length = 104; + ip6_add_del_route (im, &a); + + /* Add all-routers multicast address via local route for all tables */ + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &a.adj_index); + adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; + adj->if_address_index = ~0; + adj->rewrite_header.data_bytes = 0; + + ip6_set_reserved_multicast_address (&a.dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_routers); + + a.dst_address_length = 128; + ip6_add_del_route (im, &a); + + /* Add all-nodes multicast address via local route for all tables */ + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &a.adj_index); + adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; + adj->if_address_index = ~0; + adj->rewrite_header.data_bytes = 0; + + ip6_set_reserved_multicast_address (&a.dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + + a.dst_address_length = 128; + ip6_add_del_route (im, &a); + + /* Add all-mldv2 multicast address via local route for all tables */ + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &a.adj_index); + adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; + adj->if_address_index = ~0; + adj->rewrite_header.data_bytes = 0; + + ip6_set_reserved_multicast_address (&a.dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_mldv2_routers); + + a.dst_address_length = 128; + ip6_add_del_route (im, &a); +} + +static ip6_fib_t * +create_fib_with_table_id (ip6_main_t * im, u32 table_id) +{ + ip6_fib_t * fib; + hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs)); + vec_add2 (im->fibs, fib, 1); + fib->table_id = table_id; + fib->index = fib - im->fibs; + fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; + vnet_ip6_fib_init (im, fib->index); + return fib; +} + +ip6_fib_t * +find_ip6_fib_by_table_index_or_id (ip6_main_t * im, u32 table_index_or_id, u32 flags) +{ + uword * p, fib_index; + + fib_index = table_index_or_id; + if (! (flags & IP6_ROUTE_FLAG_FIB_INDEX)) + { + p = hash_get (im->fib_index_by_table_id, table_index_or_id); + if (! p) + return create_fib_with_table_id (im, table_index_or_id); + fib_index = p[0]; + } + return vec_elt_at_index (im->fibs, fib_index); +} + +void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * a) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip6_fib_t * fib; + ip6_address_t dst_address; + u32 dst_address_length, adj_index; + uword is_del; + u32 old_adj_index = ~0; + BVT(clib_bihash_kv) kv, value; + + vlib_smp_unsafe_warning(); + + is_del = (a->flags & IP6_ROUTE_FLAG_DEL) != 0; + + /* Either create new adjacency or use given one depending on arguments. */ + if (a->n_add_adj > 0) + { + ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); + ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); + } + else + adj_index = a->adj_index; + + dst_address = a->dst_address; + dst_address_length = a->dst_address_length; + fib = find_ip6_fib_by_table_index_or_id (im, a->table_index_or_table_id, + a->flags); + + ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); + ip6_address_mask (&dst_address, &im->fib_masks[dst_address_length]); + + /* refcount accounting */ + if (is_del) + { + ASSERT (im->dst_address_length_refcounts[dst_address_length] > 0); + if (--im->dst_address_length_refcounts[dst_address_length] == 0) + { + im->non_empty_dst_address_length_bitmap = + clib_bitmap_set (im->non_empty_dst_address_length_bitmap, + 128 - dst_address_length, 0); + compute_prefix_lengths_in_search_order (im); + } + } + else + { + im->dst_address_length_refcounts[dst_address_length]++; + + im->non_empty_dst_address_length_bitmap = + clib_bitmap_set (im->non_empty_dst_address_length_bitmap, + 128 - dst_address_length, 1); + compute_prefix_lengths_in_search_order (im); + } + + kv.key[0] = dst_address.as_u64[0]; + kv.key[1] = dst_address.as_u64[1]; + kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length; + + if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0) + old_adj_index = value.value; + + if (is_del) + BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 0 /* is_add */); + else + { + /* Make sure adj index is valid. */ + if (CLIB_DEBUG > 0) + (void) ip_get_adjacency (lm, adj_index); + + kv.value = adj_index; + + BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 1 /* is_add */); + } + + /* Delete old adjacency index if present and changed. */ + { + if (! (a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY) + && old_adj_index != ~0 + && old_adj_index != adj_index) + ip_del_adjacency (lm, old_adj_index); + } +} + +void +ip6_add_del_route_next_hop (ip6_main_t * im, + u32 flags, + ip6_address_t * dst_address, + u32 dst_address_length, + ip6_address_t * next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_weight, u32 adj_index, + u32 explicit_fib_index) +{ + vnet_main_t * vnm = vnet_get_main(); + ip_lookup_main_t * lm = &im->lookup_main; + u32 fib_index; + ip6_fib_t * fib; + ip6_address_t masked_dst_address; + u32 old_mp_adj_index, new_mp_adj_index; + u32 dst_adj_index, nh_adj_index; + int rv; + ip_adjacency_t * dst_adj; + ip_multipath_adjacency_t * old_mp, * new_mp; + int is_del = (flags & IP6_ROUTE_FLAG_DEL) != 0; + int is_interface_next_hop; + clib_error_t * error = 0; + uword * nh_result; + BVT(clib_bihash_kv) kv, value; + + vlib_smp_unsafe_warning(); + + if (explicit_fib_index == (u32)~0) + fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index); + else + fib_index = explicit_fib_index; + + fib = vec_elt_at_index (im->fibs, fib_index); + + /* Lookup next hop to be added or deleted. */ + is_interface_next_hop = ip6_address_is_zero (next_hop); + if (adj_index == (u32)~0) + { + if (is_interface_next_hop) + { + nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, + next_hop_sw_if_index); + if (nh_result) + nh_adj_index = *nh_result; + else + { + ip_adjacency_t * adj; + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &nh_adj_index); + ip6_adjacency_set_interface_route (vnm, adj, + next_hop_sw_if_index, ~0); + ip_call_add_del_adjacency_callbacks + (lm, next_hop_sw_if_index, /* is_del */ 0); + hash_set (im->interface_route_adj_index_by_sw_if_index, + next_hop_sw_if_index, nh_adj_index); + } + } + else + { + /* Look for the interface /128 route */ + kv.key[0] = next_hop->as_u64[0]; + kv.key[1] = next_hop->as_u64[1]; + kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128; + + if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0) + { + vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; + error = clib_error_return (0, "next-hop %U/128 not in FIB", + format_ip6_address, next_hop); + goto done; + } + + nh_adj_index = value.value; + } + } + else + { + /* Look for the interface /128 route */ + kv.key[0] = next_hop->as_u64[0]; + kv.key[1] = next_hop->as_u64[1]; + kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128; + + if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0) + { + vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; + error = clib_error_return (0, "next-hop %U/128 not in FIB", + format_ip6_address, next_hop); + goto done; + } + + nh_adj_index = value.value; + } + + ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); + masked_dst_address = dst_address[0]; + ip6_address_mask (&masked_dst_address, &im->fib_masks[dst_address_length]); + + kv.key[0] = masked_dst_address.as_u64[0]; + kv.key[1] = masked_dst_address.as_u64[1]; + kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length; + + rv = BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value); + + if (rv == 0) + { + dst_adj_index = value.value; + dst_adj = ip_get_adjacency (lm, dst_adj_index); + } + else + { + /* For deletes destination must be known. */ + if (is_del) + { + vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; + error = clib_error_return (0, "unknown destination %U/%d", + format_ip6_address, dst_address, + dst_address_length); + goto done; + } + + dst_adj_index = ~0; + dst_adj = 0; + } + + /* Ignore adds of X/128 with next hop of X. */ + if (! is_del + && dst_address_length == 128 + && ip6_address_is_equal (dst_address, next_hop)) + { + vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP; + error = clib_error_return (0, "prefix matches next hop %U/%d", + format_ip6_address, dst_address, + dst_address_length); + goto done; + } + + old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0; + + if (! ip_multipath_adjacency_add_del_next_hop + (lm, is_del, + dst_adj ? dst_adj->heap_handle : ~0, + nh_adj_index, + next_hop_weight, + &new_mp_adj_index)) + { + vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP; + error = clib_error_return + (0, "requested deleting next-hop %U not found in multi-path", + format_ip6_address, next_hop); + goto done; + } + + old_mp = new_mp = 0; + if (old_mp_adj_index != ~0) + old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); + if (new_mp_adj_index != ~0) + new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index); + + if (old_mp != new_mp) + { + ip6_add_del_route_args_t a; + a.table_index_or_table_id = fib_index; + a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD) + | IP6_ROUTE_FLAG_FIB_INDEX + | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY + | (flags & IP6_ROUTE_FLAG_NO_REDISTRIBUTE)); + a.dst_address = dst_address[0]; + a.dst_address_length = dst_address_length; + a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index; + a.add_adj = 0; + a.n_add_adj = 0; + + ip6_add_del_route (im, &a); + } + + done: + if (error) + clib_error_report (error); +} + +u32 +ip6_get_route (ip6_main_t * im, + u32 table_index_or_table_id, + u32 flags, + ip6_address_t * address, + u32 address_length) +{ + ip6_fib_t * fib = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); + ip6_address_t masked_address; + BVT(clib_bihash_kv) kv, value; + + ASSERT (address_length < ARRAY_LEN (im->fib_masks)); + memcpy (&masked_address, address, sizeof (masked_address)); + ip6_address_mask (&masked_address, &im->fib_masks[address_length]); + + kv.key[0] = masked_address.as_u64[0]; + kv.key[1] = masked_address.as_u64[1]; + kv.key[2] = ((u64)((fib - im->fibs))<<32) | address_length; + + if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0) + return (value.value); + return 0; +} + +void +ip6_foreach_matching_route (ip6_main_t * im, + u32 table_index_or_table_id, + u32 flags, + ip6_address_t * dst_address, + u32 address_length, + ip6_address_t ** results, + u8 ** result_lengths) +{ + ip6_fib_t * fib = + find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); + BVT(clib_bihash) * h = &im->ip6_lookup_table; + BVT(clib_bihash_value) * v; + clib_bihash_bucket_t * b; + int i, j, k; + + if (*results) + _vec_len (*results) = 0; + if (*result_lengths) + _vec_len (*result_lengths) = 0; + + /* Walk the table looking for routes which match the supplied address */ + for (i = 0; i < h->nbuckets; i++) + { + b = &h->buckets [i]; + if (b->offset == 0) + continue; + + v = BV(clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1<<b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (BV(clib_bihash_is_free)(&v->kvp[k])) + continue; + + if ((v->kvp[k].key[2] + == (((u64)((fib - im->fibs))<<32) | address_length)) + && ip6_destination_matches_route + (im, dst_address, (ip6_address_t *) &v->kvp[k], + address_length)) + { + ip6_address_t * a; + + a = (ip6_address_t *)(&v->kvp[k]); + + vec_add1 (*results, a[0]); + vec_add1 (*result_lengths, address_length); + } + } + v++; + } + } +} + +void ip6_maybe_remap_adjacencies (ip6_main_t * im, + u32 table_index_or_table_id, + u32 flags) +{ +#if SOONE + ip6_fib_t * fib + = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); +#endif + ip_lookup_main_t * lm = &im->lookup_main; + + if (lm->n_adjacency_remaps == 0) + return; + + clib_warning ("unimplemented, please report to vpp-dev@cisco.com"); + + /* All remaps have been performed. */ + lm->n_adjacency_remaps = 0; +} + +void ip6_delete_matching_routes (ip6_main_t * im, + u32 table_index_or_table_id, + u32 flags, + ip6_address_t * address, + u32 address_length) +{ + /* $$$$ static may be OK - this should happen only on thread 0 */ + static ip6_address_t * matching_addresses; + static u8 * matching_address_lengths; + u32 l, i; + ip6_add_del_route_args_t a; + + vlib_smp_unsafe_warning(); + + a.flags = IP6_ROUTE_FLAG_DEL | IP6_ROUTE_FLAG_NO_REDISTRIBUTE | flags; + a.table_index_or_table_id = table_index_or_table_id; + a.adj_index = ~0; + a.add_adj = 0; + a.n_add_adj = 0; + + for (l = address_length + 1; l <= 128; l++) + { + ip6_foreach_matching_route (im, table_index_or_table_id, flags, + address, + l, + &matching_addresses, + &matching_address_lengths); + for (i = 0; i < vec_len (matching_addresses); i++) + { + a.dst_address = matching_addresses[i]; + a.dst_address_length = matching_address_lengths[i]; + ip6_add_del_route (im, &a); + } + } + + ip6_maybe_remap_adjacencies (im, table_index_or_table_id, flags); +} + +static uword +ip6_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + ip_lookup_next_t next; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * p0, * p1; + u32 pi0, pi1, adj_index0, adj_index1, wrong_next; + ip_lookup_next_t next0, next1; + ip6_header_t * ip0, * ip1; + ip_adjacency_t * adj0, * adj1; + u32 fib_index0, fib_index1; + u32 flow_hash_config0, flow_hash_config1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); + + fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? + fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; + fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ? + fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX]; + + adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, + &ip0->dst_address); + adj_index1 = ip6_fib_lookup_with_table (im, fib_index1, + &ip1->dst_address); + + adj0 = ip_get_adjacency (lm, adj_index0); + adj1 = ip_get_adjacency (lm, adj_index1); + + if (PREDICT_FALSE (adj0->explicit_fib_index != ~0)) + { + adj_index0 = ip6_fib_lookup_with_table + (im, adj0->explicit_fib_index, &ip0->dst_address); + adj0 = ip_get_adjacency (lm, adj_index0); + } + if (PREDICT_FALSE (adj1->explicit_fib_index != ~0)) + { + adj_index1 = ip6_fib_lookup_with_table + (im, adj1->explicit_fib_index, &ip1->dst_address); + adj1 = ip_get_adjacency (lm, adj_index1); + } + + next0 = adj0->lookup_next_index; + next1 = adj1->lookup_next_index; + + /* Process hop-by-hop options if present */ + next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ? + IP_LOOKUP_NEXT_HOP_BY_HOP : next0; + next1 = (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ? + IP_LOOKUP_NEXT_HOP_BY_HOP : next1; + + vnet_buffer (p0)->ip.flow_hash = + vnet_buffer(p1)->ip.flow_hash = 0; + + if (PREDICT_FALSE(adj0->n_adj > 1)) + { + flow_hash_config0 = + vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; + vnet_buffer (p0)->ip.flow_hash = + ip6_compute_flow_hash (ip0, flow_hash_config0); + } + + if (PREDICT_FALSE(adj1->n_adj > 1)) + { + flow_hash_config1 = + vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; + + vnet_buffer (p1)->ip.flow_hash = + ip6_compute_flow_hash (ip1, flow_hash_config1); + } + + ASSERT (adj0->n_adj > 0); + ASSERT (adj1->n_adj > 0); + ASSERT (is_pow2 (adj0->n_adj)); + ASSERT (is_pow2 (adj1->n_adj)); + adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); + adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1)); + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; + + vlib_increment_combined_counter + (cm, cpu_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter + (cm, cpu_index, adj_index1, 1, + vlib_buffer_length_in_chain (vm, p1)); + + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + wrong_next = (next0 != next) + 2*(next1 != next); + if (PREDICT_FALSE (wrong_next != 0)) + { + switch (wrong_next) + { + case 1: + /* A B A */ + to_next[-2] = pi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next1, pi1); + break; + + case 3: + /* A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + vlib_set_next_frame_buffer (vm, node, next1, pi1); + if (next0 == next1) + { + /* A B B */ + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next1; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + } + } + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip0; + u32 pi0, adj_index0; + ip_lookup_next_t next0; + ip_adjacency_t * adj0; + u32 fib_index0, flow_hash_config0; + + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? + fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; + + flow_hash_config0 = + vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; + + adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, + &ip0->dst_address); + + adj0 = ip_get_adjacency (lm, adj_index0); + + if (PREDICT_FALSE (adj0->explicit_fib_index != ~0)) + { + adj_index0 = ip6_fib_lookup_with_table + (im, adj0->explicit_fib_index, &ip0->dst_address); + adj0 = ip_get_adjacency (lm, adj_index0); + } + + next0 = adj0->lookup_next_index; + next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ? + IP_LOOKUP_NEXT_HOP_BY_HOP : next0; + + vnet_buffer (p0)->ip.flow_hash = 0; + + if (PREDICT_FALSE(adj0->n_adj > 1)) + { + flow_hash_config0 = + vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; + vnet_buffer (p0)->ip.flow_hash = + ip6_compute_flow_hash (ip0, flow_hash_config0); + } + + ASSERT (adj0->n_adj > 0); + ASSERT (is_pow2 (adj0->n_adj)); + adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + + vlib_increment_combined_counter + (cm, cpu_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +void ip6_adjacency_set_interface_route (vnet_main_t * vnm, + ip_adjacency_t * adj, + u32 sw_if_index, + u32 if_address_index) +{ + vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + ip_lookup_next_t n; + u32 node_index; + + if (hw->hw_class_index == ethernet_hw_interface_class.index + || hw->hw_class_index == srp_hw_interface_class.index) + { + n = IP_LOOKUP_NEXT_ARP; + node_index = ip6_discover_neighbor_node.index; + adj->if_address_index = if_address_index; + } + else + { + n = IP_LOOKUP_NEXT_REWRITE; + node_index = ip6_rewrite_node.index; + } + + adj->lookup_next_index = n; + adj->explicit_fib_index = ~0; + + vnet_rewrite_for_sw_interface + (vnm, + VNET_L3_PACKET_TYPE_IP6, + sw_if_index, + node_index, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, + &adj->rewrite_header, + sizeof (adj->rewrite_data)); +} + +static void +ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, + ip6_main_t * im, u32 fib_index, + ip_interface_address_t * a) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_adjacency_t * adj; + ip6_address_t * address = ip_interface_address_get_address (lm, a); + ip6_add_del_route_args_t x; + vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index); + u32 classify_table_index; + + /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ + x.table_index_or_table_id = fib_index; + x.flags = (IP6_ROUTE_FLAG_ADD + | IP6_ROUTE_FLAG_FIB_INDEX + | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); + x.dst_address = address[0]; + x.dst_address_length = a->address_length; + x.n_add_adj = 0; + x.add_adj = 0; + + a->neighbor_probe_adj_index = ~0; + if (a->address_length < 128) + { + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &x.adj_index); + ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool); + ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); + ip6_add_del_route (im, &x); + a->neighbor_probe_adj_index = x.adj_index; + } + + /* Add e.g. ::1/128 as local to this host. */ + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &x.adj_index); + + classify_table_index = ~0; + if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index)) + classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index]; + if (classify_table_index != (u32) ~0) + { + adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY; + adj->classify_table_index = classify_table_index; + } + else + adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; + + adj->if_address_index = a - lm->if_address_pool; + adj->rewrite_header.sw_if_index = sw_if_index; + adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX]; + adj->rewrite_header.data_bytes = 0; + ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); + x.dst_address_length = 128; + ip6_add_del_route (im, &x); +} + +static void +ip6_del_interface_routes (ip6_main_t * im, u32 fib_index, + ip6_address_t * address, u32 address_length) +{ + ip6_add_del_route_args_t x; + + /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ + x.table_index_or_table_id = fib_index; + x.flags = (IP6_ROUTE_FLAG_DEL + | IP6_ROUTE_FLAG_FIB_INDEX + | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); + x.dst_address = address[0]; + x.dst_address_length = address_length; + x.adj_index = ~0; + x.n_add_adj = 0; + x.add_adj = 0; + + if (address_length < 128) + { + /* Don't wipe out fe80::0/64 */ + if (address_length != 64 || + address[0].as_u64[0] != clib_net_to_host_u64(0xfe80000000000000ULL)) + ip6_add_del_route (im, &x); + } + + x.dst_address_length = 128; + ip6_add_del_route (im, &x); + + ip6_delete_matching_routes (im, + fib_index, + IP6_ROUTE_FLAG_FIB_INDEX, + address, + address_length); +} + +typedef struct { + u32 sw_if_index; + ip6_address_t address; + u32 length; +} ip6_interface_address_t; + +static clib_error_t * +ip6_add_del_interface_address_internal (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * new_address, + u32 new_length, + u32 redistribute, + u32 insert_routes, + u32 is_del); + +static clib_error_t * +ip6_add_del_interface_address_internal (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, + u32 redistribute, + u32 insert_routes, + u32 is_del) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + clib_error_t * error; + u32 if_address_index; + ip6_address_fib_t ip6_af, * addr_fib = 0; + + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + ip6_addr_fib_init (&ip6_af, address, + vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); + vec_add1 (addr_fib, ip6_af); + + { + uword elts_before = pool_elts (lm->if_address_pool); + + error = ip_interface_address_add_del + (lm, + sw_if_index, + addr_fib, + address_length, + is_del, + &if_address_index); + if (error) + goto done; + + /* Pool did not grow: add duplicate address. */ + if (elts_before == pool_elts (lm->if_address_pool)) + goto done; + } + + if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes) + { + if (is_del) + ip6_del_interface_routes (im, ip6_af.fib_index, address, + address_length); + + else + ip6_add_interface_routes (vnm, sw_if_index, + im, ip6_af.fib_index, + pool_elt_at_index (lm->if_address_pool, if_address_index)); + } + + { + ip6_add_del_interface_address_callback_t * cb; + vec_foreach (cb, im->add_del_interface_address_callbacks) + cb->function (im, cb->function_opaque, sw_if_index, + address, address_length, + if_address_index, + is_del); + } + + done: + vec_free (addr_fib); + return error; +} + +clib_error_t * +ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, + ip6_address_t * address, u32 address_length, + u32 is_del) +{ + return ip6_add_del_interface_address_internal + (vm, sw_if_index, address, address_length, + /* redistribute */ 1, + /* insert_routes */ 1, + is_del); +} + +clib_error_t * +ip6_sw_interface_admin_up_down (vnet_main_t * vnm, + u32 sw_if_index, + u32 flags) +{ + ip6_main_t * im = &ip6_main; + ip_interface_address_t * ia; + ip6_address_t * a; + u32 is_admin_up, fib_index; + + /* Fill in lookup tables with default table (0). */ + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + + vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0); + + is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + + fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); + + foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, + 0 /* honor unnumbered */, + ({ + a = ip_interface_address_get_address (&im->lookup_main, ia); + if (is_admin_up) + ip6_add_interface_routes (vnm, sw_if_index, + im, fib_index, + ia); + else + ip6_del_interface_routes (im, fib_index, + a, ia->address_length); + })); + + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down); + +clib_error_t * +ip6_sw_interface_add_del (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + vlib_main_t * vm = vnm->vlib_main; + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 ci, cast; + + for (cast = 0; cast < VNET_N_CAST; cast++) + { + ip_config_main_t * cm = &lm->rx_config_mains[cast]; + vnet_config_main_t * vcm = &cm->config_main; + + /* FIXME multicast. */ + if (! vcm->node_index_by_feature_index) + { + char * start_nodes[] = { "ip6-input", }; + char * feature_nodes[] = { + [IP6_RX_FEATURE_CHECK_ACCESS] = "ip6-inacl", + [IP6_RX_FEATURE_IPSEC] = "ipsec-input-ip6", + [IP6_RX_FEATURE_L2TPV3] = "l2tp-decap", + [IP6_RX_FEATURE_VPATH] = "vpath-input-ip6", + [IP6_RX_FEATURE_LOOKUP] = "ip6-lookup", + }; + vnet_config_init (vm, vcm, + start_nodes, ARRAY_LEN (start_nodes), + feature_nodes, ARRAY_LEN (feature_nodes)); + } + + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; + + if (is_add) + ci = vnet_config_add_feature (vm, vcm, + ci, + IP6_RX_FEATURE_LOOKUP, + /* config data */ 0, + /* # bytes of config data */ 0); + else + ci = vnet_config_del_feature (vm, vcm, + ci, + IP6_RX_FEATURE_LOOKUP, + /* config data */ 0, + /* # bytes of config data */ 0); + + cm->config_index_by_sw_if_index[sw_if_index] = ci; + } + return /* no error */ 0; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del); + +VLIB_REGISTER_NODE (ip6_lookup_node) = { + .function = ip6_lookup, + .name = "ip6-lookup", + .vector_size = sizeof (u32), + + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = { + [IP_LOOKUP_NEXT_MISS] = "ip6-miss", + [IP_LOOKUP_NEXT_DROP] = "ip6-drop", + [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", + [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", + [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", + [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", + [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", + [IP_LOOKUP_NEXT_MAP] = "ip6-map", + [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", + [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd", + [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", + [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", + [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", + }, +}; + +typedef struct { + /* Adjacency taken. */ + u32 adj_index; + u32 flow_hash; + + /* Packet data, possibly *after* rewrite. */ + u8 packet_data[64 - 1*sizeof(u32)]; +} ip6_forward_next_trace_t; + +static u8 * format_ip6_forward_next_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *); + vnet_main_t * vnm = vnet_get_main(); + ip6_main_t * im = &ip6_main; + ip_adjacency_t * adj; + uword indent = format_get_indent (s); + + adj = ip_get_adjacency (&im->lookup_main, t->adj_index); + s = format (s, "adjacency: %U flow hash: 0x%08x", + format_ip_adjacency, + vnm, &im->lookup_main, t->adj_index, t->flow_hash); + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_REWRITE: + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip_adjacency_packet_data, + vnm, &im->lookup_main, t->adj_index, + t->packet_data, sizeof (t->packet_data)); + break; + + default: + break; + } + + return s; +} + +/* Common trace function for all ip6-forward next nodes. */ +void +ip6_forward_next_trace (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + vlib_rx_or_tx_t which_adj_index) +{ + u32 * from, n_left; + + n_left = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left >= 4) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + ip6_forward_next_trace_t * t0, * t1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + vlib_prefetch_buffer_with_index (vm, from[3], LOAD); + + bi0 = from[0]; + bi1 = from[1]; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; + t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; + memcpy (t0->packet_data, + vlib_buffer_get_current (b0), + sizeof (t0->packet_data)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index]; + t1->flow_hash = vnet_buffer (b1)->ip.flow_hash; + memcpy (t1->packet_data, + vlib_buffer_get_current (b1), + sizeof (t1->packet_data)); + } + from += 2; + n_left -= 2; + } + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + ip6_forward_next_trace_t * t0; + + bi0 = from[0]; + + b0 = vlib_get_buffer (vm, bi0); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; + t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; + memcpy (t0->packet_data, + vlib_buffer_get_current (b0), + sizeof (t0->packet_data)); + } + from += 1; + n_left -= 1; + } +} + +static uword +ip6_drop_or_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + ip6_error_t error_code) +{ + u32 * buffers = vlib_frame_vector_args (frame); + uword n_packets = frame->n_vectors; + + vlib_error_drop_buffers (vm, node, + buffers, + /* stride */ 1, + n_packets, + /* next */ 0, + ip6_input_node.index, + error_code); + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, VLIB_TX); + + return n_packets; +} + +static uword +ip6_drop (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_DROP); } + +static uword +ip6_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); } + +static uword +ip6_miss (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_DST_LOOKUP_MISS); } + +VLIB_REGISTER_NODE (ip6_drop_node,static) = { + .function = ip6_drop, + .name = "ip6-drop", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE (ip6_punt_node,static) = { + .function = ip6_punt, + .name = "ip6-punt", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-punt", + }, +}; + +VLIB_REGISTER_NODE (ip6_miss_node,static) = { + .function = ip6_miss, + .name = "ip6-miss", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE (ip6_multicast_node,static) = { + .function = ip6_drop, + .name = "ip6-multicast", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +/* Compute TCP/UDP/ICMP6 checksum in software. */ +u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6_header_t * ip0, int *bogus_lengthp) +{ + ip_csum_t sum0; + u16 sum16, payload_length_host_byte_order; + u32 i, n_this_buffer, n_bytes_left; + u32 headers_size = sizeof(ip0[0]); + void * data_this_buffer; + + ASSERT(bogus_lengthp); + *bogus_lengthp = 0; + + /* Initialize checksum with ip header. */ + sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol); + payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length); + data_this_buffer = (void *) (ip0 + 1); + + for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++) + { + sum0 = ip_csum_with_carry (sum0, + clib_mem_unaligned (&ip0->src_address.as_uword[i], uword)); + sum0 = ip_csum_with_carry (sum0, + clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword)); + } + + /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */ + if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + u32 skip_bytes; + ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *)data_this_buffer; + + /* validate really icmp6 next */ + ASSERT(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6); + + skip_bytes = 8* (1 + ext_hdr->n_data_u64s); + data_this_buffer = (void *)((u8 *)data_this_buffer + skip_bytes); + + payload_length_host_byte_order -= skip_bytes; + headers_size += skip_bytes; + } + + n_bytes_left = n_this_buffer = payload_length_host_byte_order; +#if DPDK > 0 + if (p0) + { + struct rte_mbuf *mb = ((struct rte_mbuf *)p0)-1; + u8 nb_segs = mb->nb_segs; + + n_this_buffer = (p0->current_length > headers_size ? + p0->current_length - headers_size : 0); + while (n_bytes_left) + { + sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); + n_bytes_left -= n_this_buffer; + + mb = mb->next; + nb_segs--; + if ((nb_segs == 0) || (mb == 0)) + break; + + data_this_buffer = rte_ctrlmbuf_data(mb); + n_this_buffer = mb->data_len; + } + if (n_bytes_left || nb_segs) + { + *bogus_lengthp = 1; + return 0xfefe; + } + } + else sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); +#else + if (p0 && n_this_buffer + headers_size > p0->current_length) + n_this_buffer = p0->current_length > headers_size ? p0->current_length - headers_size : 0; + while (1) + { + sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); + n_bytes_left -= n_this_buffer; + if (n_bytes_left == 0) + break; + + if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + *bogus_lengthp = 1; + return 0xfefe; + } + p0 = vlib_get_buffer (vm, p0->next_buffer); + data_this_buffer = vlib_buffer_get_current (p0); + n_this_buffer = p0->current_length; + } +#endif /* DPDK */ + + sum16 = ~ ip_csum_fold (sum0); + + return sum16; +} + +u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) +{ + ip6_header_t * ip0 = vlib_buffer_get_current (p0); + udp_header_t * udp0; + u16 sum16; + int bogus_length; + + /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */ + ASSERT (ip0->protocol == IP_PROTOCOL_TCP + || ip0->protocol == IP_PROTOCOL_ICMP6 + || ip0->protocol == IP_PROTOCOL_UDP + || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS); + + udp0 = (void *) (ip0 + 1); + if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0) + { + p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED + | IP_BUFFER_L4_CHECKSUM_CORRECT); + return p0->flags; + } + + sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length); + + p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED + | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT)); + + return p0->flags; +} + +static uword +ip6_local (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + ip_local_next_t next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, VLIB_TX); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * p0, * p1; + ip6_header_t * ip0, * ip1; + udp_header_t * udp0, * udp1; + u32 pi0, ip_len0, udp_len0, flags0, next0; + u32 pi1, ip_len1, udp_len1, flags1, next1; + i32 len_diff0, len_diff1; + u8 error0, type0, good_l4_checksum0; + u8 error1, type1, good_l4_checksum1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol]; + type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol]; + + next0 = lm->local_next_by_ip_protocol[ip0->protocol]; + next1 = lm->local_next_by_ip_protocol[ip1->protocol]; + + flags0 = p0->flags; + flags1 = p1->flags; + + good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + + udp0 = ip6_next_header (ip0); + udp1 = ip6_next_header (ip1); + + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0; + good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0; + + good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN; + good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN; + + /* Verify UDP length. */ + ip_len0 = clib_net_to_host_u16 (ip0->payload_length); + ip_len1 = clib_net_to_host_u16 (ip1->payload_length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + udp_len1 = clib_net_to_host_u16 (udp1->length); + + len_diff0 = ip_len0 - udp_len0; + len_diff1 = ip_len1 - udp_len1; + + len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0; + len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0; + + if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN + && ! good_l4_checksum0 + && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))) + { + flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0); + good_l4_checksum0 = + (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + } + if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN + && ! good_l4_checksum1 + && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))) + { + flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1); + good_l4_checksum1 = + (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + } + + error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL; + + error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0; + error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1; + + ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM); + ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_TCP == IP6_ERROR_TCP_CHECKSUM); + ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM); + error0 = (! good_l4_checksum0 + ? IP6_ERROR_UDP_CHECKSUM + type0 + : error0); + error1 = (! good_l4_checksum1 + ? IP6_ERROR_UDP_CHECKSUM + type1 + : error1); + + /* Drop packets from unroutable hosts. */ + /* If this is a neighbor solicitation (ICMP), skip source RPF check */ + if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP) + { + u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); + error0 = (lm->miss_adj_index == src_adj_index0 + ? IP6_ERROR_SRC_LOOKUP_MISS + : error0); + } + if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && type1 != IP_BUILTIN_PROTOCOL_ICMP) + { + u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1); + error1 = (lm->miss_adj_index == src_adj_index1 + ? IP6_ERROR_SRC_LOOKUP_MISS + : error1); + } + + next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; + next1 = error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1; + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip0; + udp_header_t * udp0; + u32 pi0, ip_len0, udp_len0, flags0, next0; + i32 len_diff0; + u8 error0, type0, good_l4_checksum0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + + type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol]; + next0 = lm->local_next_by_ip_protocol[ip0->protocol]; + + flags0 = p0->flags; + + good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + + udp0 = ip6_next_header (ip0); + + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0; + + good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN; + + /* Verify UDP length. */ + ip_len0 = clib_net_to_host_u16 (ip0->payload_length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + + len_diff0 = ip_len0 - udp_len0; + + len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0; + + if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN + && ! good_l4_checksum0 + && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))) + { + flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0); + good_l4_checksum0 = + (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + } + + error0 = IP6_ERROR_UNKNOWN_PROTOCOL; + + error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0; + + ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM); + ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_TCP == IP6_ERROR_TCP_CHECKSUM); + ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM); + error0 = (! good_l4_checksum0 + ? IP6_ERROR_UDP_CHECKSUM + type0 + : error0); + + /* If this is a neighbor solicitation (ICMP), skip source RPF check */ + if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP) + { + u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); + error0 = (lm->miss_adj_index == src_adj_index0 + ? IP6_ERROR_SRC_LOOKUP_MISS + : error0); + } + + next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; + + p0->error = error_node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip6_local_node,static) = { + .function = ip6_local, + .name = "ip6-local", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .n_next_nodes = IP_LOCAL_N_NEXT, + .next_nodes = { + [IP_LOCAL_NEXT_DROP] = "error-drop", + [IP_LOCAL_NEXT_PUNT] = "error-punt", + // [IP_LOCAL_NEXT_TCP_LOOKUP] = "ip6-tcp-lookup", + [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup", + [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input", + }, +}; + +void ip6_register_protocol (u32 protocol, u32 node_index) +{ + vlib_main_t * vm = vlib_get_main(); + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + + ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol)); + lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip6_local_node.index, node_index); +} + +typedef enum { + IP6_DISCOVER_NEIGHBOR_NEXT_DROP, + IP6_DISCOVER_NEIGHBOR_N_NEXT, +} ip6_discover_neighbor_next_t; + +typedef enum { + IP6_DISCOVER_NEIGHBOR_ERROR_DROP, + IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT, +} ip6_discover_neighbor_error_t; + +static uword +ip6_discover_neighbor (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 * from, * to_next_drop; + uword n_left_from, n_left_to_next_drop; + static f64 time_last_seed_change = -1e100; + static u32 hash_seeds[3]; + static uword hash_bitmap[256 / BITS (uword)]; + f64 time_now; + int bogus_length; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, VLIB_TX); + + time_now = vlib_time_now (vm); + if (time_now - time_last_seed_change > 1e-3) + { + uword i; + u32 * r = clib_random_buffer_get_data (&vm->random_buffer, + sizeof (hash_seeds)); + for (i = 0; i < ARRAY_LEN (hash_seeds); i++) + hash_seeds[i] = r[i]; + + /* Mark all hash keys as been not-seen before. */ + for (i = 0; i < ARRAY_LEN (hash_bitmap); i++) + hash_bitmap[i] = 0; + + time_last_seed_change = time_now; + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP, + to_next_drop, n_left_to_next_drop); + + while (n_left_from > 0 && n_left_to_next_drop > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip0; + u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0; + uword bm0; + ip_adjacency_t * adj0; + vnet_hw_interface_t * hw_if0; + u32 next0; + + pi0 = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + ip0 = vlib_buffer_get_current (p0); + + adj0 = ip_get_adjacency (lm, adj_index0); + + a0 = hash_seeds[0]; + b0 = hash_seeds[1]; + c0 = hash_seeds[2]; + + sw_if_index0 = adj0->rewrite_header.sw_if_index; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + + a0 ^= sw_if_index0; + b0 ^= ip0->dst_address.as_u32[0]; + c0 ^= ip0->dst_address.as_u32[1]; + + hash_v3_mix32 (a0, b0, c0); + + b0 ^= ip0->dst_address.as_u32[2]; + c0 ^= ip0->dst_address.as_u32[3]; + + hash_v3_finalize32 (a0, b0, c0); + + c0 &= BITS (hash_bitmap) - 1; + c0 = c0 / BITS (uword); + m0 = (uword) 1 << (c0 % BITS (uword)); + + bm0 = hash_bitmap[c0]; + drop0 = (bm0 & m0) != 0; + + /* Mark it as seen. */ + hash_bitmap[c0] = bm0 | m0; + + from += 1; + n_left_from -= 1; + to_next_drop[0] = pi0; + to_next_drop += 1; + n_left_to_next_drop -= 1; + + hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + /* If the interface is link-down, drop the pkt */ + if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) + drop0 = 1; + + p0->error = + node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP + : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT]; + if (drop0) + continue; + + { + u32 bi0 = 0; + icmp6_neighbor_solicitation_header_t * h0; + vlib_buffer_t * b0; + + h0 = vlib_packet_template_get_packet + (vm, &im->discover_neighbor_packet_template, &bi0); + + /* + * Build ethernet header. + * Choose source address based on destination lookup + * adjacency. + */ + ip6_src_address_for_packet (im, p0, &h0->ip.src_address, + sw_if_index0); + + /* + * Destination address is a solicited node multicast address. + * We need to fill in + * the low 24 bits with low 24 bits of target's address. + */ + h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13]; + h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14]; + h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15]; + + h0->neighbor.target_address = ip0->dst_address; + + memcpy (h0->link_layer_option.ethernet_address, + hw_if0->hw_address, vec_len (hw_if0->hw_address)); + + /* $$$$ appears we need this; why is the checksum non-zero? */ + h0->neighbor.icmp.checksum = 0; + h0->neighbor.icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip, + &bogus_length); + + ASSERT (bogus_length == 0); + + vlib_buffer_copy_trace_flag (vm, p0, bi0); + b0 = vlib_get_buffer (vm, bi0); + vnet_buffer (b0)->sw_if_index[VLIB_TX] + = vnet_buffer (p0)->sw_if_index[VLIB_TX]; + + /* Add rewrite/encap string. */ + vnet_rewrite_one_header (adj0[0], h0, + sizeof (ethernet_header_t)); + vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); + + /* $$$$ hack in case next0 == 0 */ + b0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_DROP]; + next0 = + vec_elt (im->discover_neighbor_next_index_by_hw_if_index, + hw_if0->hw_if_index); + + vlib_set_next_frame_buffer (vm, node, next0, bi0); + } + } + + vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP, + n_left_to_next_drop); + } + + return frame->n_vectors; +} + +static char * ip6_discover_neighbor_error_strings[] = { + [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops", + [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] + = "neighbor solicitations sent", +}; + +VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = { + .function = ip6_discover_neighbor, + .name = "ip6-discover-neighbor", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings), + .error_strings = ip6_discover_neighbor_error_strings, + + .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT, + .next_nodes = { + [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t * +ip6_discover_neighbor_hw_interface_link_up_down (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags) +{ + vlib_main_t * vm = vnm->vlib_main; + ip6_main_t * im = &ip6_main; + vnet_hw_interface_t * hw_if; + + hw_if = vnet_get_hw_interface (vnm, hw_if_index); + + vec_validate_init_empty + (im->discover_neighbor_next_index_by_hw_if_index, hw_if_index, 0); + im->discover_neighbor_next_index_by_hw_if_index[hw_if_index] + = vlib_node_add_next (vm, ip6_discover_neighbor_node.index, + hw_if->output_node_index); + return 0; +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION +(ip6_discover_neighbor_hw_interface_link_up_down); + +clib_error_t * +ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_main_t * im = &ip6_main; + icmp6_neighbor_solicitation_header_t * h; + ip6_address_t * src; + ip_interface_address_t * ia; + ip_adjacency_t * adj; + vnet_hw_interface_t * hi; + vnet_sw_interface_t * si; + vlib_buffer_t * b; + u32 bi = 0; + int bogus_length; + + si = vnet_get_sw_interface (vnm, sw_if_index); + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + return clib_error_return (0, "%U: interface %U down", + format_ip6_address, dst, + format_vnet_sw_if_index_name, vnm, + sw_if_index); + } + + src = ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia); + if (! src) + { + vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE; + return clib_error_return + (0, "no matching interface address for destination %U (interface %U)", + format_ip6_address, dst, + format_vnet_sw_if_index_name, vnm, sw_if_index); + } + + h = vlib_packet_template_get_packet (vm, &im->discover_neighbor_packet_template, &bi); + + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + + /* Destination address is a solicited node multicast address. We need to fill in + the low 24 bits with low 24 bits of target's address. */ + h->ip.dst_address.as_u8[13] = dst->as_u8[13]; + h->ip.dst_address.as_u8[14] = dst->as_u8[14]; + h->ip.dst_address.as_u8[15] = dst->as_u8[15]; + + h->ip.src_address = src[0]; + h->neighbor.target_address = dst[0]; + + memcpy (h->link_layer_option.ethernet_address, hi->hw_address, vec_len (hi->hw_address)); + + h->neighbor.icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length); + ASSERT(bogus_length == 0); + + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + + /* Add encapsulation string for software interface (e.g. ethernet header). */ + adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index); + vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); + vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); + + { + vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 * to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } + + return /* no error */ 0; +} + +typedef enum { + IP6_REWRITE_NEXT_DROP, +} ip6_rewrite_next_t; + +always_inline uword +ip6_rewrite_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int rewrite_for_locally_received_packets) +{ + ip_lookup_main_t * lm = &ip6_main.lookup_main; + u32 * from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, * to_next, next_index; + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index); + vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + u32 cpu_index = os_get_cpu_number(); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_adjacency_t * adj0, * adj1; + vlib_buffer_t * p0, * p1; + ip6_header_t * ip0, * ip1; + u32 pi0, rw_len0, next0, error0, adj_index0; + u32 pi1, rw_len1, next1, error1, adj_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->pre_data, 32, STORE); + CLIB_PREFETCH (p3->pre_data, 32, STORE); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx]; + adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx]; + + /* We should never rewrite a pkt using the MISS adjacency */ + ASSERT(adj_index0 && adj_index1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + error0 = error1 = IP6_ERROR_NONE; + + if (! rewrite_for_locally_received_packets) + { + i32 hop_limit0 = ip0->hop_limit, hop_limit1 = ip1->hop_limit; + + /* Input node should have reject packets with hop limit 0. */ + ASSERT (ip0->hop_limit > 0); + ASSERT (ip1->hop_limit > 0); + + hop_limit0 -= 1; + hop_limit1 -= 1; + + ip0->hop_limit = hop_limit0; + ip1->hop_limit = hop_limit1; + + error0 = hop_limit0 <= 0 ? IP6_ERROR_TIME_EXPIRED : error0; + error1 = hop_limit1 <= 0 ? IP6_ERROR_TIME_EXPIRED : error1; + } + + adj0 = ip_get_adjacency (lm, adj_index0); + adj1 = ip_get_adjacency (lm, adj_index1); + + if (rewrite_for_locally_received_packets) + { + /* + * If someone sends e.g. an icmp6 w/ src = dst = interface addr, + * we end up here with a local adjacency in hand + */ + if (PREDICT_FALSE(adj0->lookup_next_index + == IP_LOOKUP_NEXT_LOCAL)) + error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; + if (PREDICT_FALSE(adj1->lookup_next_index + == IP_LOOKUP_NEXT_LOCAL)) + error1 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; + } + + rw_len0 = adj0[0].rewrite_header.data_bytes; + rw_len1 = adj1[0].rewrite_header.data_bytes; + + vlib_increment_combined_counter (&lm->adjacency_counters, + cpu_index, + adj_index0, + /* packet increment */ 0, + /* byte increment */ rw_len0); + vlib_increment_combined_counter (&lm->adjacency_counters, + cpu_index, + adj_index1, + /* packet increment */ 0, + /* byte increment */ rw_len1); + + /* Check MTU of outgoing interface. */ + error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes + ? IP6_ERROR_MTU_EXCEEDED + : error0); + error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes + ? IP6_ERROR_MTU_EXCEEDED + : error1); + + p0->current_data -= rw_len0; + p1->current_data -= rw_len1; + + p0->current_length += rw_len0; + p1->current_length += rw_len1; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index; + vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index; + + next0 = (error0 == IP6_ERROR_NONE) ? + adj0[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP; + next1 = (error1 == IP6_ERROR_NONE) ? + adj1[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP; + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, + sizeof (ethernet_header_t)); + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t * adj0; + vlib_buffer_t * p0; + ip6_header_t * ip0; + u32 pi0, rw_len0; + u32 adj_index0, next0, error0; + + pi0 = to_next[0] = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx]; + + /* We should never rewrite a pkt using the MISS adjacency */ + ASSERT(adj_index0); + + adj0 = ip_get_adjacency (lm, adj_index0); + + ip0 = vlib_buffer_get_current (p0); + + error0 = IP6_ERROR_NONE; + + /* Check hop limit */ + if (! rewrite_for_locally_received_packets) + { + i32 hop_limit0 = ip0->hop_limit; + + ASSERT (ip0->hop_limit > 0); + + hop_limit0 -= 1; + + ip0->hop_limit = hop_limit0; + + error0 = hop_limit0 <= 0 ? IP6_ERROR_TIME_EXPIRED : error0; + } + + if (rewrite_for_locally_received_packets) + { + if (PREDICT_FALSE(adj0->lookup_next_index + == IP_LOOKUP_NEXT_LOCAL)) + error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; + } + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + + vlib_increment_combined_counter (&lm->adjacency_counters, + cpu_index, + adj_index0, + /* packet increment */ 0, + /* byte increment */ rw_len0); + + /* Check MTU of outgoing interface. */ + error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes + ? IP6_ERROR_MTU_EXCEEDED + : error0); + + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index; + + next0 = (error0 == IP6_ERROR_NONE) ? + adj0[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP; + + p0->error = error_node->errors[error0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Need to do trace after rewrites to pick up new packet data. */ + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, adj_rx_tx); + + return frame->n_vectors; +} + +static uword +ip6_rewrite_transit (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip6_rewrite_inline (vm, node, frame, + /* rewrite_for_locally_received_packets */ 0); +} + +static uword +ip6_rewrite_local (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip6_rewrite_inline (vm, node, frame, + /* rewrite_for_locally_received_packets */ 1); +} + +VLIB_REGISTER_NODE (ip6_rewrite_node) = { + .function = ip6_rewrite_transit, + .name = "ip6-rewrite", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .n_next_nodes = 1, + .next_nodes = { + [IP6_REWRITE_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE (ip6_rewrite_local_node,static) = { + .function = ip6_rewrite_local, + .name = "ip6-rewrite-local", + .vector_size = sizeof (u32), + + .sibling_of = "ip6-rewrite", + + .format_trace = format_ip6_forward_next_trace, + + .n_next_nodes = 1, + .next_nodes = { + [IP6_REWRITE_NEXT_DROP] = "error-drop", + }, +}; + +/* Global IP6 main. */ +ip6_main_t ip6_main; + +static clib_error_t * +ip6_lookup_init (vlib_main_t * vm) +{ + ip6_main_t * im = &ip6_main; + uword i; + + for (i = 0; i < ARRAY_LEN (im->fib_masks); i++) + { + u32 j, i0, i1; + + i0 = i / 32; + i1 = i % 32; + + for (j = 0; j < i0; j++) + im->fib_masks[i].as_u32[j] = ~0; + + if (i1) + im->fib_masks[i].as_u32[i0] = clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1)); + } + + ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1); + + if (im->lookup_table_nbuckets == 0) + im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS; + + im->lookup_table_nbuckets = 1<< max_log2 (im->lookup_table_nbuckets); + + if (im->lookup_table_size == 0) + im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE; + + BV(clib_bihash_init) (&im->ip6_lookup_table, "ip6 lookup table", + im->lookup_table_nbuckets, + im->lookup_table_size); + + /* Create FIB with index 0 and table id of 0. */ + find_ip6_fib_by_table_index_or_id (im, /* table id */ 0, IP6_ROUTE_FLAG_TABLE_ID); + + { + pg_node_t * pn; + pn = pg_get_node (ip6_lookup_node.index); + pn->unformat_edit = unformat_pg_ip6_header; + } + + { + icmp6_neighbor_solicitation_header_t p; + + memset (&p, 0, sizeof (p)); + + p.ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28); + p.ip.payload_length = clib_host_to_net_u16 (sizeof (p) + - STRUCT_OFFSET_OF (icmp6_neighbor_solicitation_header_t, neighbor)); + p.ip.protocol = IP_PROTOCOL_ICMP6; + p.ip.hop_limit = 255; + ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0); + + p.neighbor.icmp.type = ICMP6_neighbor_solicitation; + + p.link_layer_option.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address; + p.link_layer_option.header.n_data_u64s = sizeof (p.link_layer_option) / sizeof (u64); + + vlib_packet_template_init (vm, + &im->discover_neighbor_packet_template, + &p, sizeof (p), + /* alloc chunk size */ 8, + "ip6 neighbor discovery"); + } + + return 0; +} + +VLIB_INIT_FUNCTION (ip6_lookup_init); + +static clib_error_t * +add_del_ip6_interface_table (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index, table_id; + + sw_if_index = ~0; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "%d", &table_id)) + ; + else + { + error = clib_error_return (0, "expected table id `%U'", + format_unformat_error, input); + goto done; + } + + { + ip6_main_t * im = &ip6_main; + ip6_fib_t * fib = + find_ip6_fib_by_table_index_or_id (im, table_id, IP6_ROUTE_FLAG_TABLE_ID); + + if (fib) + { + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + im->fib_index_by_sw_if_index[sw_if_index] = fib->index; + } + } + + done: + return error; +} + +VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = { + .path = "set interface ip6 table", + .function = add_del_ip6_interface_table, + .short_help = "set interface ip6 table <intfc> <table-id>" +}; + +void +ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip, + u8 *mac) +{ + ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL); + /* Invert the "u" bit */ + ip->as_u8 [8] = mac[0] ^ (1<<1); + ip->as_u8 [9] = mac[1]; + ip->as_u8 [10] = mac[2]; + ip->as_u8 [11] = 0xFF; + ip->as_u8 [12] = 0xFE; + ip->as_u8 [13] = mac[3]; + ip->as_u8 [14] = mac[4]; + ip->as_u8 [15] = mac[5]; +} + +void +ip6_ethernet_mac_address_from_link_local_address (u8 *mac, + ip6_address_t *ip) +{ + /* Invert the previously inverted "u" bit */ + mac[0] = ip->as_u8 [8] ^ (1<<1); + mac[1] = ip->as_u8 [9]; + mac[2] = ip->as_u8 [10]; + mac[3] = ip->as_u8 [13]; + mac[4] = ip->as_u8 [14]; + mac[5] = ip->as_u8 [15]; +} + +static clib_error_t * +test_ip6_link_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 mac[6]; + ip6_address_t _a, *a = &_a; + + if (unformat (input, "%U", unformat_ethernet_address, mac)) + { + ip6_link_local_address_from_ethernet_mac_address (a, mac); + vlib_cli_output (vm, "Link local address: %U", + format_ip6_address, a); + ip6_ethernet_mac_address_from_link_local_address (mac, a); + vlib_cli_output (vm, "Original MAC address: %U", + format_ethernet_address, mac); + } + + return 0; +} + +VLIB_CLI_COMMAND (test_link_command, static) = { + .path = "test ip6 link", + .function = test_ip6_link_command_fn, + .short_help = "test ip6 link <mac-address>", +}; + +int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config) +{ + ip6_main_t * im6 = &ip6_main; + ip6_fib_t * fib; + uword * p = hash_get (im6->fib_index_by_table_id, table_id); + + if (p == 0) + return -1; + + fib = vec_elt_at_index (im6->fibs, p[0]); + + fib->flow_hash_config = flow_hash_config; + return 1; +} + +static clib_error_t * +set_ip6_flow_hash_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int matched = 0; + u32 table_id = 0; + u32 flow_hash_config = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "table %d", &table_id)) + matched = 1; +#define _(a,v) \ + else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;} + foreach_flow_hash_bit +#undef _ + else break; + } + + if (matched == 0) + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config); + switch (rv) + { + case 1: + break; + + case -1: + return clib_error_return (0, "no such FIB table %d", table_id); + + default: + clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config); + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = { + .path = "set ip6 flow-hash", + .short_help = + "set ip table flow-hash table <fib-id> src dst sport dport proto reverse", + .function = set_ip6_flow_hash_command_fn, +}; + +static clib_error_t * +show_ip6_local_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + int i; + + vlib_cli_output (vm, "Protocols handled by ip6_local"); + for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++) + { + if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT) + vlib_cli_output (vm, "%d", i); + } + return 0; +} + + + +VLIB_CLI_COMMAND (show_ip_local, static) = { + .path = "show ip6 local", + .function = show_ip6_local_command_fn, + .short_help = "Show ip6 local protocol table", +}; + +int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 table_index) +{ + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + ip6_main_t * ipm = &ip6_main; + ip_lookup_main_t * lm = &ipm->lookup_main; + vnet_classify_main_t * cm = &vnet_classify_main; + + if (pool_is_free_index (im->sw_interfaces, sw_if_index)) + return VNET_API_ERROR_NO_MATCHING_INTERFACE; + + if (table_index != ~0 && pool_is_free_index (cm->tables, table_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index); + lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index; + + return 0; +} + +static clib_error_t * +set_ip6_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 table_index = ~0; + int table_index_set = 0; + u32 sw_if_index = ~0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "table-index %d", &table_index)) + table_index_set = 1; + else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, + vnet_get_main(), &sw_if_index)) + ; + else + break; + } + + if (table_index_set == 0) + return clib_error_return (0, "classify table-index must be specified"); + + if (sw_if_index == ~0) + return clib_error_return (0, "interface / subif must be specified"); + + rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_MATCHING_INTERFACE: + return clib_error_return (0, "No such interface"); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "No such classifier table"); + } + return 0; +} + +VLIB_CLI_COMMAND (set_ip6_classify_command, static) = { + .path = "set ip6 classify", + .short_help = + "set ip6 classify intfc <int> table-index <index>", + .function = set_ip6_classify_command_fn, +}; + +static clib_error_t * +ip6_config (vlib_main_t * vm, unformat_input_t * input) +{ + ip6_main_t * im = &ip6_main; + uword heapsize = 0; + u32 tmp; + u32 nbuckets = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "hash-buckets %d", &tmp)) + nbuckets = tmp; + else if (unformat (input, "heap-size %dm", &tmp)) + heapsize = ((u64)tmp) << 20; + else if (unformat (input, "heap-size %dM", &tmp)) + heapsize = ((u64)tmp) << 20; + else if (unformat (input, "heap-size %dg", &tmp)) + heapsize = ((u64)tmp) << 30; + else if (unformat (input, "heap-size %dG", &tmp)) + heapsize = ((u64)tmp) << 30; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + + im->lookup_table_nbuckets = nbuckets; + im->lookup_table_size = heapsize; + + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6"); + diff --git a/vnet/vnet/ip/ip6_hop_by_hop.c b/vnet/vnet/ip/ip6_hop_by_hop.c new file mode 100644 index 00000000000..64edfd249c3 --- /dev/null +++ b/vnet/vnet/ip/ip6_hop_by_hop.c @@ -0,0 +1,1139 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> + +#include <vnet/ip/ip.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +#include <vnet/ip/ip6_hop_by_hop.h> + +ip6_hop_by_hop_main_t ip6_hop_by_hop_main; + +/* + * ip6 hop-by-hop option handling. We push pkts with h-b-h options to + * ip6_hop_by_hop_node_fn from ip6-lookup at a cost of ~2 clocks/pkt in + * the speed path. + * + * We parse through the h-b-h option TLVs, specifically looking for + * HBH_OPTION_TYPE_IOAM_DATA_LIST. [Someone needs to get bananas from + * IANA, aka to actually allocate the option TLV codes.] + * + * If we find the indicated option type, and we have remaining list + * elements in the trace list, allocate and populate the trace list + * element. + * + * At the ingress edge: punch in the h-b-h rewrite, then visit the + * standard h-b-h option handler. We have to be careful in the standard + * h-b-h handler, to avoid looping until we run out of rewrite space. + * Ask me how I know that. + * + * Remaining work: + * decide on egress point "pop and count" scheme + * time stamp handling: usec since the top of the hour? + * configure the node id + * trace list application data support + * cons up analysis / steering plug-in(s) + * add configuration binary APIs, vpe_api_test_support, yang models and + * orca code + * perf tune: dual loop, replace memcpy w/ N x 8-byte load/stores + * + */ + +/* + * primary h-b-h handler trace support + * We work pretty hard on the problem for obvious reasons + */ +typedef struct { + u32 next_index; + u32 trace_len; + u8 option_data[256]; +} ip6_hop_by_hop_trace_t; + +static u8 * format_ioam_data_list_element (u8 * s, va_list * args) +{ + ioam_data_list_element_t *elt = va_arg (*args, ioam_data_list_element_t *); + u32 ttl_node_id_host_byte_order = + clib_net_to_host_u32 (elt->ttl_node_id); + + s = format (s, "ttl %d node id %d ingress %d egress %d ts %u", + ttl_node_id_host_byte_order>>24, + ttl_node_id_host_byte_order & 0x00FFFFFF, + elt->ingress_if, + elt->egress_if, + elt->timestamp); + return s; +} + +static u8 * format_ip6_hop_by_hop_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_hop_by_hop_trace_t * t = va_arg (*args, ip6_hop_by_hop_trace_t *); + ip6_hop_by_hop_header_t *hbh0; + ip6_hop_by_hop_option_t *opt0, *limit0; + ioam_trace_option_t * trace0; + ioam_data_list_element_t * elt0; + int elt_index; + u8 type0; + + hbh0 = (ip6_hop_by_hop_header_t *)t->option_data; + + s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d\n", + t->next_index, (hbh0->length+1)<<3, t->trace_len); + + opt0 = (ip6_hop_by_hop_option_t *) (hbh0+1); + limit0 = (ip6_hop_by_hop_option_t *) ((u8 *)hbh0) + t->trace_len; + + while (opt0 < limit0) + { + type0 = opt0->type & HBH_OPTION_TYPE_MASK; + elt_index = 0; + switch (type0) + { + case HBH_OPTION_TYPE_IOAM_DATA_LIST: + trace0 = (ioam_trace_option_t *)opt0; + s = format (s, " Trace %d elts left\n", + trace0->data_list_elts_left); + elt0 = &trace0->elts[0]; + while ((u8 *) elt0 < + ((u8 *)(&trace0->elts[0]) + trace0->hdr.length - 1 + /* -1 accounts for elts_left */)) + { + s = format (s, " [%d] %U\n",elt_index, + format_ioam_data_list_element, elt0); + elt_index++; + elt0++; + } + + opt0 = (ip6_hop_by_hop_option_t *) + (((u8 *)opt0) + opt0->length + + sizeof (ip6_hop_by_hop_option_t)); + break; + + case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK: + s = format (s, " POW opt present\n"); + opt0 = (ip6_hop_by_hop_option_t *) + (((u8 *)opt0) + sizeof (ioam_pow_option_t)); + break; + + case 0: /* Pad, just stop */ + opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1; + break; + + default: + s = format (s, "Unknown %d", type0); + opt0 = (ip6_hop_by_hop_option_t *) + (((u8 *)opt0) + opt0->length + + sizeof (ip6_hop_by_hop_option_t)); + break; + } + } + return s; +} + +vlib_node_registration_t ip6_hop_by_hop_node; + +#define foreach_ip6_hop_by_hop_error \ +_(PROCESSED, "Pkts with ip6 hop-by-hop options") + +typedef enum { +#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym, + foreach_ip6_hop_by_hop_error +#undef _ + IP6_HOP_BY_HOP_N_ERROR, +} ip6_hop_by_hop_error_t; + +static char * ip6_hop_by_hop_error_strings[] = { +#define _(sym,string) string, + foreach_ip6_hop_by_hop_error +#undef _ +}; + +static uword +ip6_hop_by_hop_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; + u32 n_left_from, * from, * to_next; + ip_lookup_next_t next_index; + u32 processed = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + +#if 0 /* $$$ DUAL-LOOP ME */ + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 next0 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT; + u32 next1 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT; + u32 sw_if_index0, sw_if_index1; + u8 tmp0[6], tmp1[6]; + ethernet_header_t *en0, *en1; + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* $$$$$ Dual loop: process 2 x packets here $$$$$ */ + ASSERT (b0->current_data == 0); + ASSERT (b1->current_data == 0); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + /* $$$$$ End of processing 2 x packets $$$$$ */ + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 adj_index0; + ip6_header_t * ip0; + ip_adjacency_t * adj0; + ip6_hop_by_hop_header_t *hbh0; + ip6_hop_by_hop_option_t *opt0, *limit0; + ioam_trace_option_t * trace0; + ioam_data_list_element_t * elt0; + u8 type0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + adj0 = ip_get_adjacency (lm, adj_index0); + hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1); + opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1); + limit0 = (ip6_hop_by_hop_option_t *) + ((u8 *)hbh0 + ((hbh0->length+1)<<3)); + + /* Scan the set of h-b-h options, process ones that we understand */ + while (opt0 < limit0) + { + type0 = opt0->type & HBH_OPTION_TYPE_MASK; + switch (type0) + { + case HBH_OPTION_TYPE_IOAM_DATA_LIST: + trace0 = (ioam_trace_option_t *)opt0; + if (PREDICT_TRUE (trace0->data_list_elts_left)) + { + trace0->data_list_elts_left--; + elt0 = &trace0->elts[trace0->data_list_elts_left]; + elt0->ttl_node_id = + clib_host_to_net_u32 ((ip0->hop_limit<<24) + | hm->node_id); + elt0->ingress_if = + vnet_buffer(b0)->sw_if_index[VLIB_RX]; + elt0->egress_if = adj0->rewrite_header.sw_if_index; + elt0->timestamp = 123; /* $$$$ */ + /* $$$ set elt0->app_data */ + } + + opt0 = (ip6_hop_by_hop_option_t *) + (((u8 *)opt0) + opt0->length + + sizeof (ip6_hop_by_hop_option_t)); + break; + + case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK: + opt0 = (ip6_hop_by_hop_option_t *) + (((u8 *)opt0) + sizeof (ioam_pow_option_t)); + break; + + case 0: /* Pad */ + opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1; + goto out0; + } + } + + out0: + + /* + * Since we push pkts here from the h-b-h header imposition code + * we have to be careful what we wish for... + */ + next0 = adj0->lookup_next_index != IP_LOOKUP_NEXT_ADD_HOP_BY_HOP ? + adj0->lookup_next_index : adj0->saved_lookup_next_index; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip6_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + u32 trace_len = (hbh0->length+1)<<3; + t->next_index = next0; + /* Capture the h-b-h option verbatim */ + trace_len = trace_len < ARRAY_LEN(t->option_data) ? + trace_len : ARRAY_LEN(t->option_data); + t->trace_len = trace_len; + memcpy (t->option_data, hbh0, trace_len); + } + + processed++; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index, + IP6_HOP_BY_HOP_ERROR_PROCESSED, processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = { + .function = ip6_hop_by_hop_node_fn, + .name = "ip6-hop-by-hop", + .vector_size = sizeof (u32), + .format_trace = format_ip6_hop_by_hop_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ip6_hop_by_hop_error_strings), + .error_strings = ip6_hop_by_hop_error_strings, + + /* See ip/lookup.h */ + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = { + [IP_LOOKUP_NEXT_MISS] = "ip6-miss", + [IP_LOOKUP_NEXT_DROP] = "ip6-drop", + [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", + [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", + [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", + [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", + [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", + [IP_LOOKUP_NEXT_MAP] = "ip6-map", + [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", + [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd", + /* Next 3 arcs probably never used */ + [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", + [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", + [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", + }, +}; + +/* The main h-b-h tracer will be invoked, no need to do much here */ +typedef struct { + u32 next_index; +} ip6_add_hop_by_hop_trace_t; + +/* packet trace format function */ +static u8 * format_ip6_add_hop_by_hop_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_add_hop_by_hop_trace_t * t = va_arg (*args, + ip6_add_hop_by_hop_trace_t *); + + s = format (s, "IP6_ADD_HOP_BY_HOP: next index %d", + t->next_index); + return s; +} + +vlib_node_registration_t ip6_add_hop_by_hop_node; + +#define foreach_ip6_add_hop_by_hop_error \ +_(PROCESSED, "Pkts w/ added ip6 hop-by-hop options") + +typedef enum { +#define _(sym,str) IP6_ADD_HOP_BY_HOP_ERROR_##sym, + foreach_ip6_add_hop_by_hop_error +#undef _ + IP6_ADD_HOP_BY_HOP_N_ERROR, +} ip6_add_hop_by_hop_error_t; + +static char * ip6_add_hop_by_hop_error_strings[] = { +#define _(sym,string) string, + foreach_ip6_add_hop_by_hop_error +#undef _ +}; + +static uword +ip6_add_hop_by_hop_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; + u32 n_left_from, * from, * to_next; + ip_lookup_next_t next_index; + u32 processed = 0; + u8 * rewrite = hm->rewrite; + u32 rewrite_length = vec_len (rewrite); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + +#if 0 + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 next0 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT; + u32 next1 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT; + u32 sw_if_index0, sw_if_index1; + u8 tmp0[6], tmp1[6]; + ethernet_header_t *en0, *en1; + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* $$$$$ Dual loop: process 2 x packets here $$$$$ */ + ASSERT (b0->current_data == 0); + ASSERT (b1->current_data == 0); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + /* $$$$$ End of processing 2 x packets $$$$$ */ + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_add_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_add_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + ip6_header_t * ip0; + ip6_hop_by_hop_header_t * hbh0; + u64 * copy_src0, * copy_dst0; + u16 new_l0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + + /* Copy the ip header left by the required amount */ + copy_dst0 = (u64 *)(((u8 *)ip0) - rewrite_length); + copy_src0 = (u64 *) ip0; + + copy_dst0 [0] = copy_src0 [0]; + copy_dst0 [1] = copy_src0 [1]; + copy_dst0 [2] = copy_src0 [2]; + copy_dst0 [3] = copy_src0 [3]; + copy_dst0 [4] = copy_src0 [4]; + vlib_buffer_advance (b0, - (word)rewrite_length); + ip0 = vlib_buffer_get_current (b0); + + hbh0 = (ip6_hop_by_hop_header_t *)(ip0 + 1); + /* $$$ tune, rewrite_length is a multiple of 8 */ + memcpy (hbh0, rewrite, rewrite_length); + /* Patch the protocol chain, insert the h-b-h (type 0) header */ + hbh0->protocol = ip0->protocol; + ip0->protocol = 0; + new_l0 = clib_net_to_host_u16 (ip0->payload_length) + rewrite_length; + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + /* Populate the (first) h-b-h list elt */ + next0 = IP_LOOKUP_NEXT_HOP_BY_HOP; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip6_add_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + } + + processed++; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ip6_add_hop_by_hop_node.index, + IP6_ADD_HOP_BY_HOP_ERROR_PROCESSED, processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = { + .function = ip6_add_hop_by_hop_node_fn, + .name = "ip6-add-hop-by-hop", + .vector_size = sizeof (u32), + .format_trace = format_ip6_add_hop_by_hop_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ip6_add_hop_by_hop_error_strings), + .error_strings = ip6_add_hop_by_hop_error_strings, + + /* See ip/lookup.h */ + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = { + [IP_LOOKUP_NEXT_MISS] = "ip6-miss", + [IP_LOOKUP_NEXT_DROP] = "ip6-drop", + [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", + [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", + [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", + [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", + [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", + [IP_LOOKUP_NEXT_MAP] = "ip6-map", + [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", + [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd", + /* Next 3 arcs probably never used */ + [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", + [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", + [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", + }, +}; + + +/* The main h-b-h tracer was already invoked, no need to do much here */ +typedef struct { + u32 next_index; +} ip6_pop_hop_by_hop_trace_t; + +/* packet trace format function */ +static u8 * format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_pop_hop_by_hop_trace_t * t = va_arg (*args, ip6_pop_hop_by_hop_trace_t *); + + s = format (s, "IP6_POP_HOP_BY_HOP: next index %d", + t->next_index); + return s; +} + +vlib_node_registration_t ip6_pop_hop_by_hop_node; + +#define foreach_ip6_pop_hop_by_hop_error \ +_(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options") \ +_(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options") + +typedef enum { +#define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym, + foreach_ip6_pop_hop_by_hop_error +#undef _ + IP6_POP_HOP_BY_HOP_N_ERROR, +} ip6_pop_hop_by_hop_error_t; + +static char * ip6_pop_hop_by_hop_error_strings[] = { +#define _(sym,string) string, + foreach_ip6_pop_hop_by_hop_error +#undef _ +}; + +static uword +ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 n_left_from, * from, * to_next; + ip_lookup_next_t next_index; + u32 processed = 0; + u32 no_header = 0; + u32 (*ioam_end_of_path_cb) (vlib_main_t *, vlib_node_runtime_t *, + vlib_buffer_t *, ip6_header_t *, + ip_adjacency_t *); + + ioam_end_of_path_cb = hm->ioam_end_of_path_cb; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + +#if 0 + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 next0 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT; + u32 next1 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT; + u32 sw_if_index0, sw_if_index1; + u8 tmp0[6], tmp1[6]; + ethernet_header_t *en0, *en1; + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* $$$$$ Dual loop: process 2 x packets here $$$$$ */ + ASSERT (b0->current_data == 0); + ASSERT (b1->current_data == 0); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + /* $$$$$ End of processing 2 x packets $$$$$ */ + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_pop_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_pop_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 adj_index0; + ip6_header_t * ip0; + ip_adjacency_t * adj0; + ip6_hop_by_hop_header_t *hbh0; + u64 * copy_dst0, * copy_src0; + u16 new_l0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + adj0 = ip_get_adjacency (lm, adj_index0); + + /* Perfectly normal to end up here w/ out h-b-h header */ + if (PREDICT_TRUE (ip0->protocol == 0)) + { + hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1); + + /* Collect data from trace via callback */ + next0 = ioam_end_of_path_cb ? + ioam_end_of_path_cb (vm, node, b0, ip0, adj0) + : adj0->saved_lookup_next_index; + + + /* Pop the trace data */ + vlib_buffer_advance (b0, (hbh0->length+1)<<3); + new_l0 = clib_net_to_host_u16 (ip0->payload_length) - + ((hbh0->length+1)<<3); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip0->protocol = hbh0->protocol; + copy_src0 = (u64 *)ip0; + copy_dst0 = copy_src0 + (hbh0->length+1); + copy_dst0 [4] = copy_src0[4]; + copy_dst0 [3] = copy_src0[3]; + copy_dst0 [2] = copy_src0[2]; + copy_dst0 [1] = copy_src0[1]; + copy_dst0 [0] = copy_src0[0]; + processed++; + } + else + { + next0 = adj0->saved_lookup_next_index; + no_header++; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip6_pop_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index, + IP6_POP_HOP_BY_HOP_ERROR_PROCESSED, processed); + vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index, + IP6_POP_HOP_BY_HOP_ERROR_NO_HOHO, no_header); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = { + .function = ip6_pop_hop_by_hop_node_fn, + .name = "ip6-pop-hop-by-hop", + .vector_size = sizeof (u32), + .format_trace = format_ip6_pop_hop_by_hop_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ip6_pop_hop_by_hop_error_strings), + .error_strings = ip6_pop_hop_by_hop_error_strings, + + /* See ip/lookup.h */ + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = { + [IP_LOOKUP_NEXT_MISS] = "ip6-miss", + [IP_LOOKUP_NEXT_DROP] = "ip6-drop", + [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", + [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", + [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", + [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", + [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", + [IP_LOOKUP_NEXT_MAP] = "ip6-map", + [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", + [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd", + /* Next 3 arcs probably never used */ + [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", + [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", + [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", + }, +}; + + +static clib_error_t * +ip6_hop_by_hop_init (vlib_main_t * vm) +{ + ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; + + hm->vlib_main = vm; + hm->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (ip6_hop_by_hop_init); + +int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_option_elts, int has_pow_option) +{ + u8 *rewrite = 0; + u32 size, rnd_size; + ip6_hop_by_hop_header_t *hbh; + ioam_trace_option_t * trace_option; + ioam_pow_option_t * pow_option; + u8 *current; + + vec_free (*rwp); + + if (trace_option_elts == 0 && has_pow_option == 0) + return 0; + + if (trace_option_elts * sizeof (ioam_data_list_element_t) > 254) + return VNET_API_ERROR_INVALID_VALUE; + + /* Work out how much space we need */ + size = sizeof (ip6_hop_by_hop_header_t); + + if (trace_option_elts) + { + size += sizeof (ip6_hop_by_hop_option_t); + size += trace_option_elts * (sizeof (ioam_data_list_element_t)); + } + if (has_pow_option) + { + size += sizeof (ip6_hop_by_hop_option_t); + size += sizeof (ioam_pow_option_t); + } + + /* Round to a multiple of 8 octets */ + rnd_size = (size + 7) & ~7; + + /* allocate it, zero-fill / pad by construction */ + vec_validate (rewrite, rnd_size-1); + + hbh = (ip6_hop_by_hop_header_t *) rewrite; + /* Length of header in 8 octet units, not incl first 8 octets */ + hbh->length = (rnd_size>>3) - 1; + current = (u8 *)(hbh+1); + + if (trace_option_elts) + { + trace_option = (ioam_trace_option_t *)current; + trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_DATA_LIST + | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE; + trace_option->hdr.length = 1 /*data_list_elts_left */ + + trace_option_elts * sizeof (ioam_data_list_element_t); + trace_option->data_list_elts_left = trace_option_elts; + current += sizeof (ioam_trace_option_t) + + trace_option_elts * sizeof (ioam_data_list_element_t); + } + if (has_pow_option) + { + pow_option = (ioam_pow_option_t *)current; + pow_option->hdr.type = HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK + | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE; + pow_option->hdr.length = sizeof (ioam_pow_option_t) - + sizeof (ip6_hop_by_hop_option_t); + current += sizeof (ioam_pow_option_t); + } + + *rwp = rewrite; + return 0; +} + +static clib_error_t * +ip6_ioam_set_rewrite_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; + u32 trace_option_elts = 0; + int has_pow_option = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "trace-elts %d", &trace_option_elts)) + ; + else if (unformat (input, "pow")) + has_pow_option = 1; + else + break; + } + + rv = ip6_ioam_set_rewrite (&hm->rewrite, trace_option_elts, has_pow_option); + + switch (rv) + { + case 0: + break; + default: + return clib_error_return (0, "ip6_ioam_set_rewrite returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (ip6_ioam_set_rewrite_cmd, static) = { + .path = "ioam set rewrite", + .short_help = "ioam set rewrite [trace-elts <nn>] [pow]", + .function = ip6_ioam_set_rewrite_command_fn, +}; + +int ip6_ioam_set_destination (ip6_address_t *addr, u32 mask_width, u32 vrf_id, + int is_add, int is_pop, int is_none) +{ + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + ip_adjacency_t * adj; + u32 fib_index; + u32 len, adj_index; + int i, rv; + uword * p; + BVT(clib_bihash_kv) kv, value; + + if ((is_add + is_pop + is_none) != 1) + return VNET_API_ERROR_INVALID_VALUE_2; + + /* Go find the adjacency we're supposed to tickle */ + p = hash_get (im->fib_index_by_table_id, vrf_id); + + if (p == 0) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_index = p[0]; + + len = vec_len (im->prefix_lengths_in_search_order); + + for (i = 0; i < len; i++) + { + int dst_address_length = im->prefix_lengths_in_search_order[i]; + ip6_address_t * mask = &im->fib_masks[dst_address_length]; + + if (dst_address_length != mask_width) + continue; + + kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; + kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; + kv.key[2] = ((u64)((fib_index))<<32) | dst_address_length; + + rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value); + if (rv == 0) + goto found; + + } + return VNET_API_ERROR_NO_SUCH_ENTRY; + + found: + + /* Got it, modify as directed... */ + adj_index = value.value; + adj = ip_get_adjacency (lm, adj_index); + + /* Restore original lookup-next action */ + if (adj->saved_lookup_next_index) + { + adj->lookup_next_index = adj->saved_lookup_next_index; + adj->saved_lookup_next_index = 0; + } + + /* Save current action */ + if (is_add || is_pop) + adj->saved_lookup_next_index = adj->lookup_next_index; + + if (is_add) + adj->lookup_next_index = IP_LOOKUP_NEXT_ADD_HOP_BY_HOP; + + if (is_pop) + adj->lookup_next_index = IP_LOOKUP_NEXT_POP_HOP_BY_HOP; + + return 0; +} + +static clib_error_t * +ip6_ioam_set_destination_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_address_t addr; + u32 mask_width = ~0; + int is_add = 0; + int is_pop = 0; + int is_none = 0; + u32 vrf_id = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U/%d", + unformat_ip6_address, &addr, &mask_width)) + ; + else if (unformat (input, "vrf-id %d", &vrf_id)) + ; + else if (unformat (input, "add")) + is_add = 1; + else if (unformat (input, "pop")) + is_pop = 1; + else if (unformat (input, "none")) + is_none = 1; + else + break; + } + + if ((is_add + is_pop + is_none) != 1) + return clib_error_return (0, "One of (add, pop, none) required"); + if (mask_width == ~0) + return clib_error_return (0, "<address>/<mask-width> required"); + + rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id, + is_add, is_pop, is_none); + + switch (rv) + { + case 0: + break; + default: + return clib_error_return (0, "ip6_ioam_set_destination returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (ip6_ioam_set_destination_cmd, static) = { + .path = "ioam set destination", + .short_help = "ioam set destination <ip6-address>/<width> add | pop | none", + .function = ip6_ioam_set_destination_command_fn, +}; + +void vnet_register_ioam_end_of_path_callback (void *cb) +{ + ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; + + hm->ioam_end_of_path_cb = cb; +} + diff --git a/vnet/vnet/ip/ip6_hop_by_hop.h b/vnet/vnet/ip/ip6_hop_by_hop.h new file mode 100644 index 00000000000..82bafc5777b --- /dev/null +++ b/vnet/vnet/ip/ip6_hop_by_hop.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ip6_hop_by_hop_h__ +#define __included_ip6_hop_by_hop_h__ + +#include <vnet/ip/ip6_hop_by_hop_packet.h> + +typedef struct { + /* The current rewrite we're using */ + u8 * rewrite; + + /* Trace data processing callback */ + void *ioam_end_of_path_cb; + + /* Configured node-id */ + u32 node_id; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} ip6_hop_by_hop_main_t; + +#endif /* __included_ip6_hop_by_hop_h__ */ diff --git a/vnet/vnet/ip/ip6_hop_by_hop_packet.h b/vnet/vnet/ip/ip6_hop_by_hop_packet.h new file mode 100644 index 00000000000..a3d19035dae --- /dev/null +++ b/vnet/vnet/ip/ip6_hop_by_hop_packet.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ip6_hop_by_hop_packet_h__ +#define __included_ip6_hop_by_hop_packet_h__ + +typedef struct { + /* Protocol for next header */ + u8 protocol; + /* + * Length of hop_by_hop header in 8 octet units, + * not including the first 8 octets + */ + u8 length; +} ip6_hop_by_hop_header_t; + +typedef struct { + /* Option Type */ +#define HBH_OPTION_TYPE_SKIP_UNKNOWN (0x0 << 6) +#define HBH_OPTION_TYPE_DISCARD_UNKNOWN (0x1 << 6) +#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP (0x2 << 6) +#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST (0x3 << 6) +#define HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE (1<<5) +#define HBH_OPTION_TYPE_MASK (0x1F) + u8 type; + /* Length in octets of the option data field */ + u8 length; +} ip6_hop_by_hop_option_t; + +/* $$$$ IANA banana constants */ +#define HBH_OPTION_TYPE_IOAM_DATA_LIST 1 +#define HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK 2 + +typedef struct { + u32 ttl_node_id; + u16 ingress_if; + u16 egress_if; + u32 timestamp; + u32 app_data; +} ioam_data_list_element_t; + +typedef CLIB_PACKED(struct { + ip6_hop_by_hop_option_t hdr; + u8 data_list_elts_left; + ioam_data_list_element_t elts[0]; +}) ioam_trace_option_t; + +typedef CLIB_PACKED(struct { + ip6_hop_by_hop_option_t hdr; + u8 pow_type; + u8 reserved; + u32 random[2]; + u32 cumulative[2]; +}) ioam_pow_option_t; + +#endif /* __included_ip6_hop_by_hop_packet_h__ */ diff --git a/vnet/vnet/ip/ip6_input.c b/vnet/vnet/ip/ip6_input.c new file mode 100644 index 00000000000..ef8c7762625 --- /dev/null +++ b/vnet/vnet/ip/ip6_input.c @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6_input.c: IP v6 input node + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ppp/ppp.h> +#include <vnet/hdlc/hdlc.h> + +typedef struct { + u8 packet_data[64]; +} ip6_input_trace_t; + +static u8 * format_ip6_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ip6_input_trace_t * t = va_arg (*va, ip6_input_trace_t *); + + s = format (s, "%U", + format_ip6_header, + t->packet_data, sizeof (t->packet_data)); + + return s; +} + +typedef enum { + IP6_INPUT_NEXT_DROP, + IP6_INPUT_NEXT_LOOKUP, + IP6_INPUT_NEXT_TTL_EXPIRE, + IP6_INPUT_N_NEXT, +} ip6_input_next_t; + +/* Validate IP v6 packets and pass them either to forwarding code + or drop exception packets. */ +static uword +ip6_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 n_left_from, * from, * to_next; + ip6_input_next_t next_index; + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index); + vlib_simple_counter_main_t * cm; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (ip6_input_trace_t)); + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_IP6); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * p0, * p1; + ip6_header_t * ip0, * ip1; + ip_config_main_t * cm0, * cm1; + u32 pi0, sw_if_index0, next0; + u32 pi1, sw_if_index1, next1; + u8 error0, error1, cast0, cast1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); + } + + pi0 = from[0]; + pi1 = from[1]; + + to_next[0] = pi0; + to_next[1] = pi1; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; + + cast0 = ip6_address_is_multicast (&ip0->dst_address) ? VNET_MULTICAST : VNET_UNICAST; + cast1 = ip6_address_is_multicast (&ip1->dst_address) ? VNET_MULTICAST : VNET_UNICAST; + + cm0 = lm->rx_config_mains + cast0; + cm1 = lm->rx_config_mains + cast1; + + vnet_buffer (p0)->ip.current_config_index = vec_elt (cm0->config_index_by_sw_if_index, sw_if_index0); + vnet_buffer (p1)->ip.current_config_index = vec_elt (cm1->config_index_by_sw_if_index, sw_if_index1); + + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; + vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0; + + vnet_get_config_data (&cm0->config_main, + &vnet_buffer (p0)->ip.current_config_index, + &next0, + /* # bytes of config data */ 0); + vnet_get_config_data (&cm1->config_main, + &vnet_buffer (p1)->ip.current_config_index, + &next1, + /* # bytes of config data */ 0); + + vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1); + + error0 = error1 = IP6_ERROR_NONE; + + /* Version != 6? Drop it. */ + error0 = (clib_net_to_host_u32 (ip0->ip_version_traffic_class_and_flow_label) >> 28) != 6 ? IP6_ERROR_VERSION : error0; + error1 = (clib_net_to_host_u32 (ip1->ip_version_traffic_class_and_flow_label) >> 28) != 6 ? IP6_ERROR_VERSION : error1; + + /* hop limit < 1? Drop it. for link-local broadcast packets, + * like dhcpv6 packets from client has hop-limit 1, which should not + * be dropped. + */ + error0 = ip0->hop_limit <= 1 ? IP6_ERROR_TIME_EXPIRED : error0; + error1 = ip1->hop_limit <= 1 ? IP6_ERROR_TIME_EXPIRED : error1; + + /* L2 length must be at least minimal IP header. */ + error0 = p0->current_length < sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0; + error1 = p1->current_length < sizeof (ip1[0]) ? IP6_ERROR_TOO_SHORT : error1; + + if (PREDICT_FALSE(error0 != IP6_ERROR_NONE)) + { + next0 = (error0 == IP6_ERROR_TIME_EXPIRED) ? + IP6_INPUT_NEXT_TTL_EXPIRE : IP6_INPUT_NEXT_DROP; + } + if (PREDICT_FALSE(error1 != IP6_ERROR_NONE)) + { + next1 = (error1 == IP6_ERROR_TIME_EXPIRED) ? + IP6_INPUT_NEXT_TTL_EXPIRE : IP6_INPUT_NEXT_DROP; + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip0; + ip_config_main_t * cm0; + u32 pi0, sw_if_index0, next0; + u8 error0, cast0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip0 = vlib_buffer_get_current (p0); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + cast0 = ip6_address_is_multicast (&ip0->dst_address) ? VNET_MULTICAST : VNET_UNICAST; + cm0 = lm->rx_config_mains + cast0; + vnet_buffer (p0)->ip.current_config_index = vec_elt (cm0->config_index_by_sw_if_index, sw_if_index0); + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; + + vnet_get_config_data (&cm0->config_main, + &vnet_buffer (p0)->ip.current_config_index, + &next0, + /* # bytes of config data */ 0); + + vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + error0 = IP6_ERROR_NONE; + + /* Version != 6? Drop it. */ + error0 = (clib_net_to_host_u32 (ip0->ip_version_traffic_class_and_flow_label) >> 28) != 6 ? IP6_ERROR_VERSION : error0; + + /* hop limit < 1? Drop it. for link-local broadcast packets, + * like dhcpv6 packets from client has hop-limit 1, which should not + * be dropped. + */ + error0 = ip0->hop_limit <= 1 ? IP6_ERROR_TIME_EXPIRED : error0; + + /* L2 length must be at least minimal IP header. */ + error0 = p0->current_length < sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0; + + if (PREDICT_FALSE(error0 != IP6_ERROR_NONE)) + { + next0 = (error0 == IP6_ERROR_TIME_EXPIRED) ? + IP6_INPUT_NEXT_TTL_EXPIRE : IP6_INPUT_NEXT_DROP; + } + p0->error = error_node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static char * ip6_error_strings[] = { +#define _(sym,string) string, + foreach_ip6_error +#undef _ +}; + +VLIB_REGISTER_NODE (ip6_input_node) = { + .function = ip6_input, + .name = "ip6-input", + .vector_size = sizeof (u32), + + .n_errors = IP6_N_ERROR, + .error_strings = ip6_error_strings, + + .n_next_nodes = IP6_INPUT_N_NEXT, + .next_nodes = { + [IP6_INPUT_NEXT_DROP] = "error-drop", + [IP6_INPUT_NEXT_LOOKUP] = "ip6-lookup", + [IP6_INPUT_NEXT_TTL_EXPIRE] = "ip6-icmp-ttl-expire", + }, + + .format_buffer = format_ip6_header, + .format_trace = format_ip6_input_trace, +}; + +static clib_error_t * ip6_init (vlib_main_t * vm) +{ + ethernet_register_input_type (vm, ETHERNET_TYPE_IP6, + ip6_input_node.index); + ppp_register_input_protocol (vm, PPP_PROTOCOL_ip6, + ip6_input_node.index); + hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip6, + ip6_input_node.index); + + { + pg_node_t * pn; + pn = pg_get_node (ip6_input_node.index); + pn->unformat_edit = unformat_pg_ip6_header; + } + + /* Set flow hash to something non-zero. */ + ip6_main.flow_hash_seed = 0xdeadbeef; + + /* Default hop limit for packets we generate. */ + ip6_main.host_config.ttl = 64; + + return /* no error */ 0; +} + +VLIB_INIT_FUNCTION (ip6_init); diff --git a/vnet/vnet/ip/ip6_neighbor.c b/vnet/vnet/ip/ip6_neighbor.c new file mode 100644 index 00000000000..28f964c804f --- /dev/null +++ b/vnet/vnet/ip/ip6_neighbor.c @@ -0,0 +1,3146 @@ +/* + * ip/ip6_neighbor.c: IP6 neighbor handling + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/mhash.h> +#include <vppinfra/md5.h> + +#if DPDK==1 +#include <vnet/devices/dpdk/dpdk.h> +#endif + +typedef struct { + ip6_address_t ip6_address; + u32 sw_if_index; + u32 pad; +} ip6_neighbor_key_t; + +/* can't use sizeof link_layer_address, that's 8 */ +#define ETHER_MAC_ADDR_LEN 6 + +typedef struct { + ip6_neighbor_key_t key; + u8 link_layer_address[8]; + u64 cpu_time_last_updated; +} ip6_neighbor_t; + +/* advertised prefix option */ +typedef struct { + /* basic advertised information */ + ip6_address_t prefix; + u8 prefix_len; + int adv_on_link_flag; + int adv_autonomous_flag; + u32 adv_valid_lifetime_in_secs; + u32 adv_pref_lifetime_in_secs; + + /* advertised values are computed from these times if decrementing */ + f64 valid_lifetime_expires; + f64 pref_lifetime_expires; + + /* local information */ + int enabled; + int deprecated_prefix_flag; + int decrement_lifetime_flag; + +#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */ +#define DEF_ADV_VALID_LIFETIME 2592000 +#define DEF_ADV_PREF_LIFETIME 604800 + + /* extensions are added here, mobile, DNS etc.. */ +} ip6_radv_prefix_t; + + +typedef struct { + /* group information */ + u8 type; + ip6_address_t mcast_address; + u16 num_sources; + ip6_address_t *mcast_source_address_pool; +} ip6_mldp_group_t; + +/* configured router advertisement information per ipv6 interface */ +typedef struct { + + /* advertised config information, zero means unspecified */ + u8 curr_hop_limit; + int adv_managed_flag; + int adv_other_flag; + u16 adv_router_lifetime_in_sec; + u32 adv_neighbor_reachable_time_in_msec; + u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations; + + /* mtu option */ + u32 adv_link_mtu; + + /* source link layer option */ + u8 link_layer_address[8]; + u8 link_layer_addr_len; + + /* prefix option */ + ip6_radv_prefix_t * adv_prefixes_pool; + + /* Hash table mapping address to index in interface advertised prefix pool. */ + mhash_t address_to_prefix_index; + + /* MLDP group information */ + ip6_mldp_group_t * mldp_group_pool; + + /* Hash table mapping address to index in mldp address pool. */ + mhash_t address_to_mldp_index; + + /* local information */ + u32 sw_if_index; + u32 fib_index; + int send_radv; /* radv on/off on this interface - set by config */ + int cease_radv; /* we are ceasing to send - set byf config */ + int send_unicast; + int adv_link_layer_address; + int prefix_option; + int failed_device_check; + int all_routers_mcast; + u32 seed; + u64 randomizer; + int ref_count; + u32 all_nodes_adj_index; + u32 all_routers_adj_index; + u32 all_mldv2_routers_adj_index; + + /* timing information */ +#define DEF_MAX_RADV_INTERVAL 200 +#define DEF_MIN_RADV_INTERVAL .75 * DEF_MAX_RADV_INTERVAL +#define DEF_CURR_HOP_LIMIT 64 +#define DEF_DEF_RTR_LIFETIME 3 * DEF_MAX_RADV_INTERVAL +#define MAX_DEF_RTR_LIFETIME 9000 + +#define MAX_INITIAL_RTR_ADVERT_INTERVAL 16 /* seconds */ +#define MAX_INITIAL_RTR_ADVERTISEMENTS 3 /*transmissions */ +#define MIN_DELAY_BETWEEN_RAS 3 /* seconds */ +#define MAX_DELAY_BETWEEN_RAS 1800 /* seconds */ +#define MAX_RA_DELAY_TIME .5 /* seconds */ + + f64 max_radv_interval; + f64 min_radv_interval; + f64 min_delay_between_radv; + f64 max_delay_between_radv; + f64 max_rtr_default_lifetime; + + f64 last_radv_time; + f64 last_multicast_time; + f64 next_multicast_time; + + + u32 initial_adverts_count; + f64 initial_adverts_interval; + u32 initial_adverts_sent; + + /* stats */ + u32 n_advertisements_sent; + u32 n_solicitations_rcvd; + u32 n_solicitations_dropped; + + /* Link local address to use (defaults to underlying physical for logical interfaces */ + ip6_address_t link_local_address; + u8 link_local_prefix_len; + +} ip6_radv_t; + +typedef struct { + u32 next_index; + uword node_index; + uword type_opaque; + uword data; +} pending_resolution_t; + + +typedef struct { + /* Hash tables mapping name to opcode. */ + uword * opcode_by_name; + + /* lite beer "glean" adjacency handling */ + mhash_t pending_resolutions_by_address; + pending_resolution_t * pending_resolutions; + + u32 * neighbor_input_next_index_by_hw_if_index; + + ip6_neighbor_t * neighbor_pool; + + mhash_t neighbor_index_by_key; + + u32 * if_radv_pool_index_by_sw_if_index; + + ip6_radv_t * if_radv_pool; + + /* Neighbor attack mitigation */ + u32 limit_neighbor_cache_size; + u32 neighbor_delete_rotor; + +} ip6_neighbor_main_t; + +static ip6_neighbor_main_t ip6_neighbor_main; + +static u8 * format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + ip6_neighbor_t * n = va_arg (*va, ip6_neighbor_t *); + vnet_main_t * vnm = vnet_get_main(); + vnet_sw_interface_t * si; + + if (! n) + return format (s, "%=12s%=20s%=20s%=40s", "Time", "Address", "Link layer", "Interface"); + + si = vnet_get_sw_interface (vnm, n->key.sw_if_index); + s = format (s, "%=12U%=20U%=20U%=40U", + format_vlib_cpu_time, vm, n->cpu_time_last_updated, + format_ip6_address, &n->key.ip6_address, + format_ethernet_address, n->link_layer_address, + format_vnet_sw_interface_name, vnm, si); + + return s; +} + +static clib_error_t * +ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, + u32 sw_if_index, + u32 flags) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + ip6_neighbor_t * n; + + if (! (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + u32 i, * to_delete = 0; + + pool_foreach (n, nm->neighbor_pool, ({ + if (n->key.sw_if_index == sw_if_index) + vec_add1 (to_delete, n - nm->neighbor_pool); + })); + + for (i = 0; i < vec_len (to_delete); i++) + { + n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]); + mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); + pool_put (nm->neighbor_pool, n); + } + + vec_free (to_delete); + } + + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_neighbor_sw_interface_up_down); + +static void unset_random_neighbor_entry (void) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + vnet_main_t * vnm = vnet_get_main(); + vlib_main_t * vm = vnm->vlib_main; + ip6_neighbor_t * e; + u32 index; + + index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor); + nm->neighbor_delete_rotor = index; + + /* Try again from elt 0, could happen if an intfc goes down */ + if (index == ~0) + { + index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor); + nm->neighbor_delete_rotor = index; + } + + /* Nothing left in the pool */ + if (index == ~0) + return; + + e = pool_elt_at_index (nm->neighbor_pool, index); + + vnet_unset_ip6_ethernet_neighbor (vm, e->key.sw_if_index, + &e->key.ip6_address, + e->link_layer_address, + ETHER_MAC_ADDR_LEN); +} + +typedef struct { + u8 is_add; + u8 pad; + u8 link_layer_address[6]; + u32 sw_if_index; + ip6_address_t addr; +} ip6_neighbor_set_unset_rpc_args_t; + +static void ip6_neighbor_set_unset_rpc_callback +( ip6_neighbor_set_unset_rpc_args_t * a); + +#if DPDK > 0 +static void set_unset_ip6_neighbor_rpc +(vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * a, + u8 *link_layer_addreess, + int is_add) +{ + ip6_neighbor_set_unset_rpc_args_t args; + + args.sw_if_index = sw_if_index; + args.is_add = is_add; + memcpy (&args.addr, a, sizeof (*a)); + memcpy (args.link_layer_address, link_layer_addreess, 6); + + vl_api_rpc_call_main_thread (ip6_neighbor_set_unset_rpc_callback, + (u8 *) &args, sizeof (args)); +} +#endif + +int +vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * a, + u8 * link_layer_address, + uword n_bytes_link_layer_address) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + ip6_neighbor_key_t k; + ip6_neighbor_t * n; + ip6_main_t * im = &ip6_main; + uword * p; + u32 next_index; + pending_resolution_t * pr; + +#if DPDK > 0 + if (os_get_cpu_number()) + { + set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address, + 1 /* set new neighbor */); + return 0; + } +#endif + + k.sw_if_index = sw_if_index; + k.ip6_address = a[0]; + k.pad = 0; + + vlib_worker_thread_barrier_sync (vm); + + p = mhash_get (&nm->neighbor_index_by_key, &k); + if (p) + n = pool_elt_at_index (nm->neighbor_pool, p[0]); + else + { + ip6_add_del_route_args_t args; + ip_adjacency_t adj; + + memset (&adj, 0, sizeof(adj)); + adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + adj.explicit_fib_index = ~0; + + vnet_rewrite_for_sw_interface + (vnm, + VNET_L3_PACKET_TYPE_IP6, + sw_if_index, + ip6_rewrite_node.index, + link_layer_address, + &adj.rewrite_header, + sizeof (adj.rewrite_data)); + + args.table_index_or_table_id = im->fib_index_by_sw_if_index[sw_if_index]; + args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_ADD | IP6_ROUTE_FLAG_NEIGHBOR; + args.dst_address = a[0]; + args.dst_address_length = 128; + args.adj_index = ~0; + args.add_adj = &adj; + args.n_add_adj = 1; + + ip6_add_del_route (im, &args); + pool_get (nm->neighbor_pool, n); + mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool, + /* old value */ 0); + n->key = k; + } + + /* Update time stamp and ethernet address. */ + memcpy (n->link_layer_address, link_layer_address, n_bytes_link_layer_address); + n->cpu_time_last_updated = clib_cpu_time_now (); + + /* Customer(s) waiting for this address to be resolved? */ + p = mhash_get (&nm->pending_resolutions_by_address, a); + if (p == 0) + goto out; + + next_index = p[0]; + + while (next_index != (u32)~0) + { + pr = pool_elt_at_index (nm->pending_resolutions, next_index); + vlib_process_signal_event (vm, pr->node_index, + pr->type_opaque, + pr->data); + next_index = pr->next_index; + pool_put (nm->pending_resolutions, pr); + } + + mhash_unset (&nm->pending_resolutions_by_address, a, 0); + +out: + vlib_worker_thread_barrier_release(vm); + return 0; +} + +int +vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * a, + u8 * link_layer_address, + uword n_bytes_link_layer_address) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + ip6_neighbor_key_t k; + ip6_neighbor_t * n; + ip6_main_t * im = &ip6_main; + ip6_add_del_route_args_t args; + uword * p; + int rv = 0; + +#if DPDK > 0 + if (os_get_cpu_number()) + { + set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address, + 0 /* unset */); + return 0; + } +#endif + + k.sw_if_index = sw_if_index; + k.ip6_address = a[0]; + k.pad = 0; + + vlib_worker_thread_barrier_sync (vm); + + p = mhash_get (&nm->neighbor_index_by_key, &k); + if (p == 0) + { + rv = -1; + goto out; + } + + n = pool_elt_at_index (nm->neighbor_pool, p[0]); + mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); + pool_put (nm->neighbor_pool, n); + + args.table_index_or_table_id = im->fib_index_by_sw_if_index[sw_if_index]; + args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_DEL + | IP6_ROUTE_FLAG_NEIGHBOR; + args.dst_address = a[0]; + args.dst_address_length = 128; + args.adj_index = ~0; + args.add_adj = NULL; + args.n_add_adj = 0; + ip6_add_del_route (im, &args); + out: + vlib_worker_thread_barrier_release(vm); + return rv; +} + +static void ip6_neighbor_set_unset_rpc_callback +( ip6_neighbor_set_unset_rpc_args_t * a) +{ + vlib_main_t * vm = vlib_get_main(); + if (a->is_add) + vnet_set_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr, + a->link_layer_address, 6); + else + vnet_unset_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr, + a->link_layer_address, 6); +} + +static int +ip6_neighbor_sort (void *a1, void *a2) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_neighbor_t * n1 = a1, * n2 = a2; + int cmp; + cmp = vnet_sw_interface_compare (vnm, n1->key.sw_if_index, + n2->key.sw_if_index); + if (! cmp) + cmp = ip6_address_compare (&n1->key.ip6_address, &n2->key.ip6_address); + return cmp; +} + +static clib_error_t * +show_ip6_neighbors (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + ip6_neighbor_t * n, * ns; + clib_error_t * error = 0; + u32 sw_if_index; + + /* Filter entries by interface if given. */ + sw_if_index = ~0; + (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index); + + ns = 0; + pool_foreach (n, nm->neighbor_pool, ({ vec_add1 (ns, n[0]); })); + vec_sort_with_function (ns, ip6_neighbor_sort); + vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, 0); + vec_foreach (n, ns) { + if (sw_if_index != ~0 && n->key.sw_if_index != sw_if_index) + continue; + vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, n); + } + vec_free (ns); + + return error; +} + +VLIB_CLI_COMMAND (show_ip6_neighbors_command, static) = { + .path = "show ip6 neighbors", + .function = show_ip6_neighbors, + .short_help = "Show ip6 neighbors", +}; + +static clib_error_t * +set_ip6_neighbor (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_address_t addr; + u8 mac_address[6]; + int addr_valid = 0; + int is_del = 0; + u32 sw_if_index; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* intfc, ip6-address, mac-address */ + if (unformat (input, "%U %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip6_address, &addr, + unformat_ethernet_address, mac_address)) + addr_valid = 1; + + else if (unformat (input, "delete") || unformat (input, "del")) + is_del = 1; + else + break; + } + + if (!addr_valid) + return clib_error_return (0, "Missing interface, ip6 or hw address"); + + if (!is_del) + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, &addr, + mac_address, sizeof(mac_address)); + else + vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, &addr, + mac_address, sizeof(mac_address)); + return 0; +} + +VLIB_CLI_COMMAND (set_ip6_neighbor_command, static) = { + .path = "set ip6 neighbor", + .function = set_ip6_neighbor, + .short_help = "set ip6 neighbor [del] <intfc> <ip6-address> <mac-address>", +}; + +typedef enum { + ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP, + ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY, + ICMP6_NEIGHBOR_SOLICITATION_N_NEXT, +} icmp6_neighbor_solicitation_or_advertisement_next_t; + +static_always_inline uword +icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + uword is_solicitation) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + uword n_packets = frame->n_vectors; + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next_index, n_advertisements_sent; + icmp6_neighbor_discovery_option_type_t option_type; + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_icmp_input_node.index); + int bogus_length; + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp6_input_trace_t)); + + option_type = + (is_solicitation + ? ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address + : ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address); + n_advertisements_sent = 0; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip0; + icmp6_neighbor_solicitation_or_advertisement_header_t * h0; + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t * o0; + u32 bi0, options_len0, sw_if_index0, next0, error0; + u32 ip6_sadd_link_local, ip6_sadd_unspecified; + int is_rewrite0; + u32 ni0; + + bi0 = to_next[0] = from[0]; + + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + h0 = ip6_next_header (ip0); + options_len0 = clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]); + + error0 = ICMP6_ERROR_NONE; + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + ip6_sadd_link_local = ip6_address_is_link_local_unicast(&ip0->src_address); + ip6_sadd_unspecified = ip6_address_is_unspecified (&ip0->src_address); + + /* Check that source address is unspecified, link-local or else on-link. */ + if (!ip6_sadd_unspecified && !ip6_sadd_link_local) + { + u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); + ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0); + + /* Allow all realistic-looking rewrite adjacencies to pass */ + ni0 = adj0->lookup_next_index; + is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) && + (ni0 < IP_LOOKUP_N_NEXT); + + error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0 + || ! is_rewrite0) + ? ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK + : error0); + } + + o0 = (void *) (h0 + 1); + o0 = ((options_len0 == 8 && o0->header.type == option_type + && o0->header.n_data_u64s == 1) ? o0 : 0); + + /* If src address unspecified or link local, donot learn neighbor MAC */ + if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 && + !ip6_sadd_unspecified && !ip6_sadd_link_local)) + { + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + if (nm->limit_neighbor_cache_size && + pool_elts (nm->neighbor_pool) >= nm->limit_neighbor_cache_size) + unset_random_neighbor_entry(); + vnet_set_ip6_ethernet_neighbor ( + vm, sw_if_index0, + is_solicitation ? &ip0->src_address : &h0->target_address, + o0->ethernet_address, sizeof (o0->ethernet_address)); + } + + if (is_solicitation && error0 == ICMP6_ERROR_NONE) + { + /* Check that target address is one that we know about. */ + ip_interface_address_t * ia0; + ip6_address_fib_t ip6_af0; + void * oldheap; + + ip6_addr_fib_init (&ip6_af0, &h0->target_address, + vec_elt (im->fib_index_by_sw_if_index, + sw_if_index0)); + + /* Gross kludge, "thank you" MJ, don't even ask */ + oldheap = clib_mem_set_heap (clib_per_cpu_mheaps[0]); + ia0 = ip_get_interface_address (lm, &ip6_af0); + clib_mem_set_heap (oldheap); + error0 = ia0 == 0 ? + ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN : error0; + } + + if (is_solicitation) + next0 = (error0 != ICMP6_ERROR_NONE + ? ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP + : ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY); + else + { + next0 = 0; + error0 = error0 == ICMP6_ERROR_NONE ? + ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_RX : error0; + } + + if (is_solicitation && error0 == ICMP6_ERROR_NONE) + { + vnet_sw_interface_t * sw_if0; + ethernet_interface_t * eth_if0; + ethernet_header_t *eth0; + + /* dst address is either source address or the all-nodes mcast addr */ + if(!ip6_sadd_unspecified) + ip0->dst_address = ip0->src_address; + else + ip6_set_reserved_multicast_address(&ip0->dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + + ip0->src_address = h0->target_address; + ip0->hop_limit = 255; + h0->icmp.type = ICMP6_neighbor_advertisement; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0); + ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + eth_if0 = ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + if (eth_if0 && o0) + { + memcpy (o0->ethernet_address, eth_if0->address, 6); + o0->header.type = + ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address; + } + + h0->advertisement_flags = clib_host_to_net_u32 + (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED + | ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE); + + h0->icmp.checksum = 0; + h0->icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, + &bogus_length); + ASSERT(bogus_length == 0); + + /* Reuse current MAC header, copy SMAC to DMAC and + * interface MAC to SMAC */ + vlib_buffer_reset (p0); + eth0 = vlib_buffer_get_current(p0); + memcpy(eth0->dst_address, eth0->src_address, 6); + memcpy(eth0->src_address, eth_if0->address, 6); + + /* Setup input and output sw_if_index for packet */ + ASSERT(vnet_buffer(p0)->sw_if_index[VLIB_RX] == sw_if_index0); + vnet_buffer(p0)->sw_if_index[VLIB_TX] = sw_if_index0; + vnet_buffer(p0)->sw_if_index[VLIB_RX] = + vnet_main.local_interface_sw_if_index; + + n_advertisements_sent++; + } + + p0->error = error_node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Account for advertisements sent. */ + vlib_error_count (vm, error_node->node_index, ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX, n_advertisements_sent); + + return frame->n_vectors; +} + +/* for "syslogging" - use elog for now */ +#define foreach_log_level \ + _ (DEBUG, "DEBUG") \ + _ (INFO, "INFORMATION") \ + _ (NOTICE, "NOTICE") \ + _ (WARNING, "WARNING") \ + _ (ERR, "ERROR") \ + _ (CRIT, "CRITICAL") \ + _ (ALERT, "ALERT") \ + _ (EMERG, "EMERGENCY") + +typedef enum { +#define _(f,s) LOG_##f, + foreach_log_level +#undef _ +} log_level_t; + +static char * log_level_strings[] = { +#define _(f,s) s, + foreach_log_level +#undef _ +}; + +static int logmask = 1 << LOG_DEBUG; + +static void +ip6_neighbor_syslog(vlib_main_t *vm, int priority, char * fmt, ...) +{ + /* just use elog for now */ + u8 *what; + va_list va; + + if( (priority > LOG_EMERG) || + !(logmask & (1 << priority))) + return; + + va_start (va, fmt); + if(fmt) + { + what = va_format (0, fmt, &va); + + ELOG_TYPE_DECLARE (e) = { + .format = "ip6 nd: (%s): %s", + .format_args = "T4T4", + }; + struct { u32 s[2]; } * ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->s[0] = elog_string(&vm->elog_main, log_level_strings[priority]); + ed->s[1] = elog_string(&vm->elog_main, (char *)what); + } + va_end (va); + return; +} + +/* ipv6 neighbor discovery - router advertisements */ +typedef enum { + ICMP6_ROUTER_SOLICITATION_NEXT_DROP, + ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW, + ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX, + ICMP6_ROUTER_SOLICITATION_N_NEXT, +} icmp6_router_solicitation_or_advertisement_next_t; + +static_always_inline uword +icmp6_router_solicitation(vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_main_t * im = &ip6_main; + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + uword n_packets = frame->n_vectors; + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next_index; + u32 n_advertisements_sent = 0; + int bogus_length; + + icmp6_neighbor_discovery_option_type_t option_type; + + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_icmp_input_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp6_input_trace_t)); + + /* source may append his LL address */ + option_type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip0; + ip6_radv_t *radv_info = 0; + + icmp6_neighbor_discovery_header_t * h0; + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t * o0; + + u32 bi0, options_len0, sw_if_index0, next0, error0; + u32 is_solicitation = 1, is_dropped = 0; + u32 is_unspecified, is_link_local; + + bi0 = to_next[0] = from[0]; + + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + h0 = ip6_next_header (ip0); + options_len0 = clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]); + is_unspecified = ip6_address_is_unspecified (&ip0->src_address); + is_link_local = ip6_address_is_link_local_unicast (&ip0->src_address); + + error0 = ICMP6_ERROR_NONE; + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + /* check if solicitation (not from nd_timer node) */ + if (ip6_address_is_unspecified (&ip0->dst_address)) + is_solicitation = 0; + + /* Check that source address is unspecified, link-local or else on-link. */ + if (!is_unspecified && !is_link_local) + { + u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); + ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0); + + error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0 + || (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP + && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE)) + ? ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK + : error0); + } + + /* check for source LL option and process */ + o0 = (void *) (h0 + 1); + o0 = ((options_len0 == 8 + && o0->header.type == option_type + && o0->header.n_data_u64s == 1) + ? o0 + : 0); + + /* if src address unspecified IGNORE any options */ + if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 && + !is_unspecified && !is_link_local)) { + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + if (nm->limit_neighbor_cache_size && + pool_elts (nm->neighbor_pool) >= nm->limit_neighbor_cache_size) + unset_random_neighbor_entry(); + + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index0, + &ip0->src_address, + o0->ethernet_address, + sizeof (o0->ethernet_address)); + } + + /* default is to drop */ + next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP; + + if (error0 == ICMP6_ERROR_NONE) + { + vnet_sw_interface_t * sw_if0; + ethernet_interface_t * eth_if0; + u32 adj_index0; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0); + ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + eth_if0 = ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + + /* only support ethernet interface type for now */ + error0 = (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF : error0; + + if (error0 == ICMP6_ERROR_NONE) + { + u32 ri; + + /* adjust the sizeof the buffer to just include the ipv6 header */ + p0->current_length -= (options_len0 + sizeof(icmp6_neighbor_discovery_header_t)); + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0]; + + if(ri != ~0) + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + error0 = ((!radv_info) ? ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG : error0); + + if (error0 == ICMP6_ERROR_NONE) + { + f64 now = vlib_time_now (vm); + + /* for solicited adverts - need to rate limit */ + if(is_solicitation) + { + if( (now - radv_info->last_radv_time) < MIN_DELAY_BETWEEN_RAS ) + is_dropped = 1; + else + radv_info->last_radv_time = now; + } + + /* send now */ + icmp6_router_advertisement_header_t rh; + + rh.icmp.type = ICMP6_router_advertisement; + rh.icmp.code = 0; + rh.icmp.checksum = 0; + + rh.current_hop_limit = radv_info->curr_hop_limit; + rh.router_lifetime_in_sec = clib_host_to_net_u16(radv_info->adv_router_lifetime_in_sec); + rh.time_in_msec_between_retransmitted_neighbor_solicitations = + clib_host_to_net_u32(radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations); + rh.neighbor_reachable_time_in_msec = + clib_host_to_net_u32(radv_info->adv_neighbor_reachable_time_in_msec); + + rh.flags = (radv_info->adv_managed_flag) ? ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP : 0; + rh.flags |= ( (radv_info->adv_other_flag) ? ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP : 0); + + + u16 payload_length = sizeof(icmp6_router_advertisement_header_t); + + vlib_buffer_add_data (vm, + p0->free_list_index, + bi0, + (void *)&rh, sizeof(icmp6_router_advertisement_header_t)); + + if(radv_info->adv_link_layer_address) + { + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t h; + + h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address; + h.header.n_data_u64s = 1; + + /* copy ll address */ + memcpy(&h.ethernet_address[0], eth_if0->address, 6); + + vlib_buffer_add_data (vm, + p0->free_list_index, + bi0, + (void *)&h, sizeof(icmp6_neighbor_discovery_ethernet_link_layer_address_option_t)); + + payload_length += sizeof(icmp6_neighbor_discovery_ethernet_link_layer_address_option_t); + } + + /* add MTU option */ + if(radv_info->adv_link_mtu) + { + icmp6_neighbor_discovery_mtu_option_t h; + + h.unused = 0; + h.mtu = clib_host_to_net_u32(radv_info->adv_link_mtu); + h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu; + h.header.n_data_u64s = 1; + + payload_length += sizeof( icmp6_neighbor_discovery_mtu_option_t); + + vlib_buffer_add_data (vm, + p0->free_list_index, + bi0, + (void *)&h, sizeof(icmp6_neighbor_discovery_mtu_option_t)); + } + + /* add advertised prefix options */ + ip6_radv_prefix_t *pr_info; + + pool_foreach (pr_info, radv_info->adv_prefixes_pool, ({ + + if(pr_info->enabled && + (!pr_info->decrement_lifetime_flag || (pr_info->pref_lifetime_expires >0))) + { + /* advertise this prefix */ + icmp6_neighbor_discovery_prefix_information_option_t h; + + h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information; + h.header.n_data_u64s = (sizeof(icmp6_neighbor_discovery_prefix_information_option_t) >> 3); + + h.dst_address_length = pr_info->prefix_len; + + h.flags = (pr_info->adv_on_link_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK : 0; + h.flags |= (pr_info->adv_autonomous_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO : 0; + + if(radv_info->cease_radv && pr_info->deprecated_prefix_flag) + { + h.valid_time = clib_host_to_net_u32(MIN_ADV_VALID_LIFETIME); + h.preferred_time = 0; + } + else + { + if(pr_info->decrement_lifetime_flag) + { + pr_info->adv_valid_lifetime_in_secs = ((pr_info->valid_lifetime_expires > now)) ? + (pr_info->valid_lifetime_expires - now) : 0; + + pr_info->adv_pref_lifetime_in_secs = ((pr_info->pref_lifetime_expires > now)) ? + (pr_info->pref_lifetime_expires - now) : 0; + } + + h.valid_time = clib_host_to_net_u32(pr_info->adv_valid_lifetime_in_secs); + h.preferred_time = clib_host_to_net_u32(pr_info->adv_pref_lifetime_in_secs) ; + } + h.unused = 0; + + memcpy(&h.dst_address, &pr_info->prefix, sizeof(ip6_address_t)); + + payload_length += sizeof( icmp6_neighbor_discovery_prefix_information_option_t); + + vlib_buffer_add_data (vm, + p0->free_list_index, + bi0, + (void *)&h, sizeof(icmp6_neighbor_discovery_prefix_information_option_t)); + + } + })); + + /* add additional options before here */ + + /* finish building the router advertisement... */ + if(!is_unspecified && radv_info->send_unicast) + { + ip0->dst_address = ip0->src_address; + } + else + { + /* target address is all-nodes mcast addr */ + ip6_set_reserved_multicast_address(&ip0->dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + } + + /* source address MUST be the link-local address */ + ip0->src_address = radv_info->link_local_address; + + ip0->hop_limit = 255; + ip0->payload_length = clib_host_to_net_u16 (payload_length); + + icmp6_router_advertisement_header_t * rh0 = (icmp6_router_advertisement_header_t *)(ip0 + 1); + rh0->icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, + &bogus_length); + ASSERT(bogus_length == 0); + + /* setup output if and adjacency */ + vnet_buffer (p0)->sw_if_index[VLIB_RX] = + vnet_main.local_interface_sw_if_index; + + if (is_solicitation) + { + ethernet_header_t *eth0; + /* Reuse current MAC header, copy SMAC to DMAC and + * interface MAC to SMAC */ + vlib_buffer_reset (p0); + eth0 = vlib_buffer_get_current(p0); + memcpy(eth0->dst_address, eth0->src_address, 6); + memcpy(eth0->src_address, eth_if0->address, 6); + next0 = is_dropped ? + next0 : ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX; + vnet_buffer(p0)->sw_if_index[VLIB_TX] = sw_if_index0; + } + else + { + adj_index0 = radv_info->all_nodes_adj_index; + if (adj_index0 == 0) + error0 = ICMP6_ERROR_DST_LOOKUP_MISS; + else + { + ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, adj_index0); + error0 = + ((adj0->rewrite_header.sw_if_index != sw_if_index0 + || adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE) + ? ICMP6_ERROR_ROUTER_SOLICITATION_DEST_UNKNOWN + : error0); + next0 = is_dropped ? + next0 : ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW; + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0; + } + } + + radv_info->n_solicitations_dropped += is_dropped; + radv_info->n_solicitations_rcvd += is_solicitation; + + if((error0 == ICMP6_ERROR_NONE) && !is_dropped) + { + radv_info->n_advertisements_sent++; + n_advertisements_sent++; + } + } + } + } + + p0->error = error_node->errors[error0]; + + if(error0 != ICMP6_ERROR_NONE) + vlib_error_count (vm, error_node->node_index, error0, 1); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Account for router advertisements sent. */ + vlib_error_count (vm, error_node->node_index, ICMP6_ERROR_ROUTER_ADVERTISEMENTS_TX, n_advertisements_sent); + + return frame->n_vectors; +} + + /* validate advertised info for consistancy (see RFC-4861 section 6.2.7) - log any inconsistencies, packet will always be dropped */ +static_always_inline uword +icmp6_router_advertisement(vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + uword n_packets = frame->n_vectors; + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next_index; + u32 n_advertisements_rcvd = 0; + + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_icmp_input_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp6_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip0; + ip6_radv_t *radv_info = 0; + icmp6_router_advertisement_header_t * h0; + u32 bi0, options_len0, sw_if_index0, next0, error0; + + bi0 = to_next[0] = from[0]; + + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + h0 = ip6_next_header (ip0); + options_len0 = clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]); + + error0 = ICMP6_ERROR_NONE; + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + /* Check that source address is link-local*/ + error0 = (!ip6_address_is_link_local_unicast (&ip0->src_address)) ? + ICMP6_ERROR_ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL : error0; + + /* default is to drop */ + next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP; + + n_advertisements_rcvd++; + + if (error0 == ICMP6_ERROR_NONE) + { + vnet_sw_interface_t * sw_if0; + ethernet_interface_t * eth_if0; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0); + ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + eth_if0 = ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + + /* only support ethernet interface type for now */ + error0 = (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF : error0; + + if (error0 == ICMP6_ERROR_NONE) + { + u32 ri; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0]; + + if(ri != ~0) + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + error0 = ((!radv_info) ? ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG : error0); + + if (error0 == ICMP6_ERROR_NONE) + { + /* validate advertised information */ + if((h0->current_hop_limit && radv_info->curr_hop_limit) && + (h0->current_hop_limit != radv_info->curr_hop_limit)) + { + ip6_neighbor_syslog(vm, LOG_WARNING, + "our AdvCurHopLimit on %U doesn't agree with %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address); + } + + if((h0->flags & ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP) != + radv_info->adv_managed_flag) + { + ip6_neighbor_syslog(vm, LOG_WARNING, + "our AdvManagedFlag on %U doesn't agree with %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address); + } + + if((h0->flags & ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP) != + radv_info->adv_other_flag) + { + ip6_neighbor_syslog(vm, LOG_WARNING, + "our AdvOtherConfigFlag on %U doesn't agree with %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address); + } + + if((h0->time_in_msec_between_retransmitted_neighbor_solicitations && + radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations) && + (h0->time_in_msec_between_retransmitted_neighbor_solicitations != + clib_host_to_net_u32(radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations))) + { + ip6_neighbor_syslog(vm, LOG_WARNING, + "our AdvRetransTimer on %U doesn't agree with %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address); + } + + if((h0->neighbor_reachable_time_in_msec && + radv_info->adv_neighbor_reachable_time_in_msec) && + (h0->neighbor_reachable_time_in_msec != + clib_host_to_net_u32(radv_info->adv_neighbor_reachable_time_in_msec))) + { + ip6_neighbor_syslog(vm, LOG_WARNING, + "our AdvReachableTime on %U doesn't agree with %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address); + } + + /* check for MTU or prefix options or .. */ + u8 * opt_hdr = (u8 *)(h0 + 1); + while( options_len0 > 0) + { + icmp6_neighbor_discovery_option_header_t *o0 = ( icmp6_neighbor_discovery_option_header_t *)opt_hdr; + int opt_len = o0->n_data_u64s << 3; + icmp6_neighbor_discovery_option_type_t option_type = o0->type; + + if(options_len0 < 2) + { + ip6_neighbor_syslog(vm, LOG_ERR, + "malformed RA packet on %U from %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address); + break; + } + + if(opt_len == 0) + { + ip6_neighbor_syslog(vm, LOG_ERR, + " zero length option in RA on %U from %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address); + break; + } + else if( opt_len > options_len0) + { + ip6_neighbor_syslog(vm, LOG_ERR, + "option length in RA packet greater than total length on %U from %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address); + break; + } + + options_len0 -= opt_len; + opt_hdr += opt_len; + + switch(option_type) + { + case ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu: + { + icmp6_neighbor_discovery_mtu_option_t *h = + (icmp6_neighbor_discovery_mtu_option_t *)(o0); + + if(opt_len < sizeof(*h)) + break; + + if((h->mtu && radv_info->adv_link_mtu) && + (h->mtu != clib_host_to_net_u32(radv_info->adv_link_mtu))) + { + ip6_neighbor_syslog(vm, LOG_WARNING, + "our AdvLinkMTU on %U doesn't agree with %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address); + } + } + break; + + case ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information: + { + icmp6_neighbor_discovery_prefix_information_option_t *h = + (icmp6_neighbor_discovery_prefix_information_option_t *)(o0); + + /* validate advertised prefix options */ + ip6_radv_prefix_t *pr_info; + u32 preferred, valid; + + if(opt_len < sizeof(*h)) + break; + + preferred = clib_net_to_host_u32(h->preferred_time); + valid = clib_net_to_host_u32(h->valid_time); + + /* look for matching prefix - if we our advertising it, it better be consistant */ + pool_foreach (pr_info, radv_info->adv_prefixes_pool, ({ + + ip6_address_t mask; + ip6_address_mask_from_width(&mask, pr_info->prefix_len); + + if(pr_info->enabled && + (pr_info->prefix_len == h->dst_address_length) && + ip6_address_is_equal_masked (&pr_info->prefix, &h->dst_address, &mask)) + { + /* found it */ + if(!pr_info->decrement_lifetime_flag && + valid != pr_info->adv_valid_lifetime_in_secs) + { + ip6_neighbor_syslog(vm, LOG_WARNING, + "our ADV validlifetime on %U for %U does not agree with %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix, + format_ip6_address, &h->dst_address); + } + if(!pr_info->decrement_lifetime_flag && + preferred != pr_info->adv_pref_lifetime_in_secs) + { + ip6_neighbor_syslog(vm, LOG_WARNING, + "our ADV preferredlifetime on %U for %U does not agree with %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix, + format_ip6_address, &h->dst_address); + } + } + break; + })); + break; + } + default: + /* skip this one */ + break; + } + } + } + } + } + + p0->error = error_node->errors[error0]; + + if(error0 != ICMP6_ERROR_NONE) + vlib_error_count (vm, error_node->node_index, error0, 1); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Account for router advertisements sent. */ + vlib_error_count (vm, error_node->node_index, ICMP6_ERROR_ROUTER_ADVERTISEMENTS_RX, n_advertisements_rcvd); + + return frame->n_vectors; +} + +/* create and initialize router advertisement parameters with default values for this intfc */ +static u32 +ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + ip6_main_t * im = &ip6_main; + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + ip_lookup_main_t * lm = &im->lookup_main; + ip6_radv_t * a= 0; + u32 ri = ~0;; + vnet_sw_interface_t * sw_if0; + ethernet_interface_t * eth_if0 = 0; + + /* lookup radv container - ethernet interfaces only */ + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index); + if(sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE) + eth_if0 = ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + + if(!eth_if0) + return ri; + + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0); + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + if(ri != ~0) + { + a = pool_elt_at_index (nm->if_radv_pool, ri); + + if(!is_add) + { + u32 i, * to_delete = 0; + ip6_radv_prefix_t *p; + ip6_mldp_group_t *m; + + /* remove adjacencies */ + ip_del_adjacency (lm, a->all_nodes_adj_index); + ip_del_adjacency (lm, a->all_routers_adj_index); + ip_del_adjacency (lm, a->all_mldv2_routers_adj_index); + + /* clean up prefix_pool */ + pool_foreach (p, a->adv_prefixes_pool, ({ + vec_add1 (to_delete, p - a->adv_prefixes_pool); + })); + + for (i = 0; i < vec_len (to_delete); i++) + { + p = pool_elt_at_index (a->adv_prefixes_pool, to_delete[i]); + mhash_unset (&a->address_to_prefix_index, &p->prefix, 0); + pool_put (a->adv_prefixes_pool, p); + } + + vec_free (to_delete); + to_delete = 0; + + /* clean up mldp group pool */ + pool_foreach (m, a->mldp_group_pool, ({ + vec_add1 (to_delete, m - a->mldp_group_pool); + })); + + for (i = 0; i < vec_len (to_delete); i++) + { + m = pool_elt_at_index (a->mldp_group_pool, to_delete[i]); + mhash_unset (&a->address_to_mldp_index, &m->mcast_address, 0); + pool_put (a->mldp_group_pool, m); + } + + vec_free (to_delete); + + pool_put (nm->if_radv_pool, a); + nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ~0; + ri = ~0; + } + } + else + { + if(is_add) + { + vnet_hw_interface_t * hw_if0; + + hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index); + + pool_get (nm->if_radv_pool, a); + + ri = a - nm->if_radv_pool; + nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ri; + + /* initialize default values (most of which are zero) */ + memset (a, 0, sizeof (a[0])); + + a->sw_if_index = sw_if_index; + a->fib_index = ~0; + a->max_radv_interval = DEF_MAX_RADV_INTERVAL; + a->min_radv_interval = DEF_MIN_RADV_INTERVAL; + a->curr_hop_limit = DEF_CURR_HOP_LIMIT; + a->adv_router_lifetime_in_sec = DEF_DEF_RTR_LIFETIME; + + a->adv_link_layer_address = 1; /* send ll address source address option */ + + a->min_delay_between_radv = MIN_DELAY_BETWEEN_RAS; + a->max_delay_between_radv = MAX_DELAY_BETWEEN_RAS; + a->max_rtr_default_lifetime = MAX_DEF_RTR_LIFETIME; + a->seed = random_default_seed(); + + /* for generating random interface ids */ + a->randomizer = 0x1119194911191949; + a->randomizer = random_u64 ((u32 *)&a->randomizer); + + a->initial_adverts_count = MAX_INITIAL_RTR_ADVERTISEMENTS ; + a->initial_adverts_sent = a->initial_adverts_count-1; + a->initial_adverts_interval = MAX_INITIAL_RTR_ADVERT_INTERVAL; + + /* deafult is to send */ + a->send_radv = 1; + + /* fill in radv_info for this interface that will be needed later */ + a->adv_link_mtu = hw_if0->max_l3_packet_bytes[VLIB_RX]; + + memcpy (a->link_layer_address, eth_if0->address, 6); + + /* fill in default link-local address (this may be overridden) */ + ip6_link_local_address_from_ethernet_address (&a->link_local_address, eth_if0->address); + a->link_local_prefix_len = 64; + + mhash_init (&a->address_to_prefix_index, sizeof (uword), sizeof (ip6_address_t)); + mhash_init (&a->address_to_mldp_index, sizeof (uword), sizeof (ip6_address_t)); + + { + ip_adjacency_t *adj; + u8 link_layer_address[6] = + {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_all_hosts}; + + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &a->all_nodes_adj_index); + + adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + adj->if_address_index = ~0; + + vnet_rewrite_for_sw_interface + (vnm, + VNET_L3_PACKET_TYPE_IP6, + sw_if_index, + ip6_rewrite_node.index, + link_layer_address, + &adj->rewrite_header, + sizeof (adj->rewrite_data)); + } + + { + ip_adjacency_t *adj; + u8 link_layer_address[6] = + {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_all_routers}; + + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &a->all_routers_adj_index); + + adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + adj->if_address_index = ~0; + + vnet_rewrite_for_sw_interface + (vnm, + VNET_L3_PACKET_TYPE_IP6, + sw_if_index, + ip6_rewrite_node.index, + link_layer_address, + &adj->rewrite_header, + sizeof (adj->rewrite_data)); + } + + { + ip_adjacency_t *adj; + u8 link_layer_address[6] = + {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_mldv2_routers}; + + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &a->all_mldv2_routers_adj_index); + + adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + adj->if_address_index = ~0; + + vnet_rewrite_for_sw_interface + (vnm, + VNET_L3_PACKET_TYPE_IP6, + sw_if_index, + ip6_rewrite_node.index, + link_layer_address, + &adj->rewrite_header, + sizeof (adj->rewrite_data)); + } + + /* add multicast groups we will always be reporting */ + ip6_address_t addr; + ip6_mldp_group_t *mcast_group_info; + + ip6_set_reserved_multicast_address (&addr, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + + /* lookup mldp info for this interface */ + + uword * p = mhash_get (&a->address_to_mldp_index, &addr); + mcast_group_info = p ? pool_elt_at_index (a->mldp_group_pool, p[0]) : 0; + + /* add address */ + if(!mcast_group_info) + { + /* add */ + u32 mi; + pool_get (a->mldp_group_pool, mcast_group_info); + + mi = mcast_group_info - a->mldp_group_pool; + mhash_set (&a->address_to_mldp_index, &addr, mi, /* old_value */ 0); + + mcast_group_info->type = 4; + mcast_group_info->mcast_source_address_pool = 0; + mcast_group_info->num_sources = 0; + memcpy(&mcast_group_info->mcast_address, &addr, sizeof(ip6_address_t)); + } + + ip6_set_reserved_multicast_address (&addr, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_routers); + + p = mhash_get (&a->address_to_mldp_index, &addr); + mcast_group_info = p ? pool_elt_at_index (a->mldp_group_pool, p[0]) : 0; + + if(!mcast_group_info) + { + /* add */ + u32 mi; + pool_get (a->mldp_group_pool, mcast_group_info); + + mi = mcast_group_info - a->mldp_group_pool; + mhash_set (&a->address_to_mldp_index, &addr, mi, /* old_value */ 0); + + mcast_group_info->type = 4; + mcast_group_info->mcast_source_address_pool = 0; + mcast_group_info->num_sources = 0; + memcpy(&mcast_group_info->mcast_address, &addr, sizeof(ip6_address_t)); + } + + ip6_set_reserved_multicast_address (&addr, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_mldv2_routers); + + p = mhash_get (&a->address_to_mldp_index, &addr); + mcast_group_info = p ? pool_elt_at_index (a->mldp_group_pool, p[0]) : 0; + + if(!mcast_group_info) + { + /* add */ + u32 mi; + pool_get (a->mldp_group_pool, mcast_group_info); + + mi = mcast_group_info - a->mldp_group_pool; + mhash_set (&a->address_to_mldp_index, &addr, mi, /* old_value */ 0); + + mcast_group_info->type = 4; + mcast_group_info->mcast_source_address_pool = 0; + mcast_group_info->num_sources = 0; + memcpy(&mcast_group_info->mcast_address, &addr, sizeof(ip6_address_t)); + } + } + } + return ri; +} + +/* send an mldpv2 report */ +static void +ip6_neighbor_send_mldpv2_report(u32 sw_if_index) +{ + vnet_main_t * vnm = vnet_get_main(); + vlib_main_t * vm = vnm->vlib_main; + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + vnet_sw_interface_t * sw_if0; + ethernet_interface_t * eth_if0; + u32 ri; + int bogus_length; + + ip6_radv_t *radv_info; + u16 payload_length; + vlib_buffer_t * b0; + ip6_header_t * ip0; + u32 * to_next; + vlib_frame_t * f; + u32 bo0; + u32 n_to_alloc = 1; + u32 n_allocated; + + icmp6_multicast_listener_report_header_t *rh0; + icmp6_multicast_listener_report_packet_t *rp0; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index); + ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + eth_if0 = ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + + if (!eth_if0 || !vnet_sw_interface_is_admin_up (vnm, sw_if_index)) + return; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + if(ri == ~0) + return; + + /* send report now - build a mldpv2 report packet */ + n_allocated = vlib_buffer_alloc_from_free_list(vm, + &bo0, + n_to_alloc, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + if (PREDICT_FALSE(n_allocated == 0)) + { + clib_warning ("buffer allocation failure"); + return; + } + + b0 = vlib_get_buffer (vm, bo0); + + /* adjust the sizeof the buffer to just include the ipv6 header */ + b0->current_length = sizeof(icmp6_multicast_listener_report_packet_t); + + payload_length = sizeof(icmp6_multicast_listener_report_header_t); + + b0->error = ICMP6_ERROR_NONE; + + rp0 = vlib_buffer_get_current (b0); + ip0 = (ip6_header_t *)&rp0-> ip; + rh0 = (icmp6_multicast_listener_report_header_t *)&rp0-> report_hdr; + + memset (rp0 , 0x0, sizeof (icmp6_multicast_listener_report_packet_t)); + + ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28); + + ip0->protocol = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS; + /* for DEBUG - vnet driver won't seem to emit router alerts */ + /* ip0->protocol = IP_PROTOCOL_ICMP6; */ + ip0->hop_limit = 1; + + rh0->icmp.type = ICMP6_multicast_listener_report_v2; + + /* source address MUST be the link-local address */ + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + ip0->src_address = radv_info->link_local_address; + + /* destination is all mldpv2 routers */ + ip6_set_reserved_multicast_address(&ip0->dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_mldv2_routers); + + /* add reports here */ + ip6_mldp_group_t *m; + int num_addr_records = 0; + icmp6_multicast_address_record_t rr; + + /* fill in the hop-by-hop extension header (router alert) info */ + rh0->ext_hdr.next_hdr = IP_PROTOCOL_ICMP6; + rh0->ext_hdr.n_data_u64s = 0; + + rh0->alert.type = IP6_MLDP_ALERT_TYPE; + rh0->alert.len = 2; + rh0->alert.value = 0; + + rh0->pad.type = 1; + rh0->pad.len = 0; + + rh0->icmp.checksum = 0; + + pool_foreach (m, radv_info->mldp_group_pool, ({ + + rr.type = m->type; + rr.aux_data_len_u32s = 0; + rr.num_sources = clib_host_to_net_u16 (m->num_sources); + memcpy(&rr.mcast_addr, &m->mcast_address, sizeof(ip6_address_t)); + + num_addr_records++; + + vlib_buffer_add_data (vm, + b0->free_list_index, + bo0, + (void *)&rr, sizeof(icmp6_multicast_address_record_t)); + + payload_length += sizeof( icmp6_multicast_address_record_t); + })); + + rh0->rsvd = 0; + rh0->num_addr_records = clib_host_to_net_u16(num_addr_records); + + /* update lengths */ + ip0->payload_length = clib_host_to_net_u16 (payload_length); + + rh0->icmp.checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, + &bogus_length); + ASSERT(bogus_length == 0); + + /* + * OK to override w/ no regard for actual FIB, because + * ip6-rewrite-local only looks at the adjacency. + */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + vnet_main.local_interface_sw_if_index; + + vnet_buffer (b0)->ip.adj_index[VLIB_RX] = + radv_info->all_mldv2_routers_adj_index; + + vlib_node_t * node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite-local"); + + f = vlib_get_frame_to_node (vm, node->index); + to_next = vlib_frame_vector_args (f); + to_next[0] = bo0; + f->n_vectors = 1; + + vlib_put_frame_to_node (vm, node->index, f); + return; +} + +VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) = { + .function = icmp6_router_solicitation, + .name = "icmp6-router-solicitation", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_next_nodes = ICMP6_ROUTER_SOLICITATION_N_NEXT, + .next_nodes = { + [ICMP6_ROUTER_SOLICITATION_NEXT_DROP] = "error-drop", + [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite-local", + [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output", + }, +}; + +/* send a RA or update the timer info etc.. */ +static uword +ip6_neighbor_process_timer_event (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + ip6_radv_t *radv_info; + vlib_frame_t * f = 0; + u32 n_this_frame = 0; + u32 n_left_to_next; + u32 * to_next; + u32 bo0; + icmp6_router_solicitation_header_t * h0; + vlib_buffer_t * b0; + f64 now = vlib_time_now (vm); + + /* Interface ip6 radv info list */ + pool_foreach (radv_info, nm->if_radv_pool, ({ + + if( !vnet_sw_interface_is_admin_up (vnm, radv_info->sw_if_index)) + { + radv_info->initial_adverts_sent = radv_info->initial_adverts_count-1; + radv_info->next_multicast_time = now; + radv_info->last_multicast_time = now; + radv_info->last_radv_time = 0; + radv_info->all_routers_mcast = 0; + continue; + } + + /* Make sure that we've joined the all-routers multicast group */ + if(!radv_info->all_routers_mcast) + { + /* send MDLP_REPORT_EVENT message */ + ip6_neighbor_send_mldpv2_report(radv_info->sw_if_index); + radv_info->all_routers_mcast = 1; + } + + /* is it time to send a multicast RA on this interface? */ + if(radv_info->send_radv && (now >= radv_info->next_multicast_time)) + { + u32 n_to_alloc = 1; + u32 n_allocated; + + f64 rfn = (radv_info->max_radv_interval - radv_info->min_radv_interval) * + random_f64 (&radv_info->seed) + radv_info->min_radv_interval; + + /* multicast send - compute next multicast send time */ + if( radv_info->initial_adverts_sent > 0) + { + radv_info->initial_adverts_sent--; + if(rfn > radv_info-> initial_adverts_interval) + rfn = radv_info-> initial_adverts_interval; + + /* check to see if we are ceasing to send */ + if( radv_info->initial_adverts_sent == 0) + if(radv_info->cease_radv) + radv_info->send_radv = 0; + } + + radv_info->next_multicast_time = rfn + now; + radv_info->last_multicast_time = now; + + /* send advert now - build a "solicted" router advert with unspecified source address */ + n_allocated = vlib_buffer_alloc_from_free_list(vm, + &bo0, + n_to_alloc, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + if (PREDICT_FALSE(n_allocated == 0)) + { + clib_warning ("buffer allocation failure"); + continue; + } + b0 = vlib_get_buffer (vm, bo0); + b0->current_length = sizeof( icmp6_router_solicitation_header_t); + b0->error = ICMP6_ERROR_NONE; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = radv_info->sw_if_index; + + h0 = vlib_buffer_get_current (b0); + + memset (h0, 0, sizeof (icmp6_router_solicitation_header_t)); + + h0->ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28); + h0->ip.payload_length = clib_host_to_net_u16 (sizeof (icmp6_router_solicitation_header_t) + - STRUCT_OFFSET_OF (icmp6_router_solicitation_header_t, neighbor)); + h0->ip.protocol = IP_PROTOCOL_ICMP6; + h0->ip.hop_limit = 255; + + /* set src/dst address as "unspecified" this marks this packet as internally generated rather than recieved */ + h0->ip.src_address.as_u64[0] = 0; + h0->ip.src_address.as_u64[1] = 0; + + h0->ip.dst_address.as_u64[0] = 0; + h0->ip.dst_address.as_u64[1] = 0; + + h0->neighbor.icmp.type = ICMP6_router_solicitation; + + if (PREDICT_FALSE(f == 0)) + { + f = vlib_get_frame_to_node (vm, ip6_icmp_router_solicitation_node.index); + to_next = vlib_frame_vector_args (f); + n_left_to_next = VLIB_FRAME_SIZE; + n_this_frame = 0; + } + + n_this_frame++; + n_left_to_next--; + to_next[0] = bo0; + to_next += 1; + + if (PREDICT_FALSE(n_left_to_next == 0)) + { + f->n_vectors = n_this_frame; + vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f); + f = 0; + } + } + })); + + if (f) + { + ASSERT(n_this_frame); + f->n_vectors = n_this_frame; + vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f); + } + return 0; +} + +static uword +ip6_icmp_neighbor_discovery_event_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + uword event_type; + ip6_icmp_neighbor_discovery_event_data_t * event_data; + + /* init code here */ + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, 1. /* seconds */); + + event_data = vlib_process_get_event_data (vm, &event_type); + + if(!event_data) + { + /* No events found: timer expired. */ + /* process interface list and send RAs as appropriate, update timer info */ + ip6_neighbor_process_timer_event (vm, node, frame); + } + else + { + switch (event_type) { + + case ICMP6_ND_EVENT_INIT: + break; + + case ~0: + break; + + default: + ASSERT (0); + } + + if (event_data) + _vec_len (event_data) = 0; + } + } + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) = { + .function = icmp6_router_advertisement, + .name = "icmp6-router-advertisement", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node = { + + .function = ip6_icmp_neighbor_discovery_event_process, + .name = "ip6-icmp-neighbor-discovery-event-process", + .type = VLIB_NODE_TYPE_PROCESS, +}; + +static uword +icmp6_neighbor_solicitation (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame, /* is_solicitation */ 1); } + +static uword +icmp6_neighbor_advertisement (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame, /* is_solicitation */ 0); } + +VLIB_REGISTER_NODE (ip6_icmp_neighbor_solicitation_node,static) = { + .function = icmp6_neighbor_solicitation, + .name = "icmp6-neighbor-solicitation", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_next_nodes = ICMP6_NEIGHBOR_SOLICITATION_N_NEXT, + .next_nodes = { + [ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP] = "error-drop", + [ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY] = "interface-output", + }, +}; + +VLIB_REGISTER_NODE (ip6_icmp_neighbor_advertisement_node,static) = { + .function = icmp6_neighbor_advertisement, + .name = "icmp6-neighbor-advertisement", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +/* API support functions */ +int +ip6_neighbor_ra_config(vlib_main_t * vm, u32 sw_if_index, + u8 surpress, u8 managed, u8 other, + u8 ll_option, u8 send_unicast, u8 cease, + u8 use_lifetime, u32 lifetime, + u32 initial_count, u32 initial_interval, + u32 max_interval, u32 min_interval, + u8 is_no) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + int error; + u32 ri; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0); + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if(!error) + { + + ip6_radv_t * radv_info; + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + if((max_interval != 0) && (min_interval ==0)) + min_interval = .75 * max_interval; + + max_interval = (max_interval != 0) ? ( (is_no) ? DEF_MAX_RADV_INTERVAL : max_interval) : radv_info->max_radv_interval; + min_interval = (min_interval != 0) ? ( (is_no) ? DEF_MIN_RADV_INTERVAL : min_interval) : radv_info->min_radv_interval; + lifetime = (use_lifetime != 0) ? ( (is_no) ? DEF_DEF_RTR_LIFETIME : lifetime) : radv_info->adv_router_lifetime_in_sec; + + if(lifetime) + { + if(lifetime > MAX_DEF_RTR_LIFETIME) + lifetime = MAX_DEF_RTR_LIFETIME; + + if(lifetime <= max_interval) + return VNET_API_ERROR_INVALID_VALUE; + } + + if(min_interval != 0) + { + if((min_interval > .75 * max_interval) || + (min_interval < 3)) + return VNET_API_ERROR_INVALID_VALUE; + } + + if((initial_count > MAX_INITIAL_RTR_ADVERTISEMENTS) || + (initial_interval > MAX_INITIAL_RTR_ADVERT_INTERVAL)) + return VNET_API_ERROR_INVALID_VALUE; + + /* + if "flag" is set and is_no is true then restore default value else set value corresponding to "flag" + if "flag" is clear don't change corresponding value + */ + radv_info->send_radv = (surpress != 0) ? ( (is_no != 0) ? 1 : 0 ) : radv_info->send_radv; + radv_info->adv_managed_flag = ( managed != 0) ? ( (is_no) ? 0 : 1) : radv_info->adv_managed_flag; + radv_info->adv_other_flag = (other != 0) ? ( (is_no) ? 0: 1) : radv_info->adv_other_flag; + radv_info->adv_link_layer_address = ( ll_option != 0) ? ( (is_no) ? 1 : 0) : radv_info->adv_link_layer_address; + radv_info->send_unicast = (send_unicast != 0) ? ( (is_no) ? 0 : 1) : radv_info->send_unicast; + radv_info->cease_radv = ( cease != 0) ? ( (is_no) ? 0 : 1) : radv_info->cease_radv; + + radv_info->min_radv_interval = min_interval; + radv_info->max_radv_interval = max_interval; + radv_info->adv_router_lifetime_in_sec = lifetime; + + radv_info->initial_adverts_count = + (initial_count != 0) ? ( (is_no) ? MAX_INITIAL_RTR_ADVERTISEMENTS : initial_count) : radv_info->initial_adverts_count ; + radv_info->initial_adverts_interval = + (initial_interval != 0) ? ( (is_no) ? MAX_INITIAL_RTR_ADVERT_INTERVAL : initial_interval) : radv_info->initial_adverts_interval; + + /* restart */ + if((cease != 0) && (is_no)) + radv_info-> send_radv = 1; + + radv_info->initial_adverts_sent = radv_info->initial_adverts_count -1; + radv_info->next_multicast_time = vlib_time_now (vm); + radv_info->last_multicast_time = vlib_time_now (vm); + radv_info->last_radv_time = 0; + } + return(error); +} + +int +ip6_neighbor_ra_prefix(vlib_main_t * vm, u32 sw_if_index, + ip6_address_t *prefix_addr, u8 prefix_len, + u8 use_default, u32 val_lifetime, u32 pref_lifetime, + u8 no_advertise, u8 off_link, u8 no_autoconfig, u8 no_onlink, + u8 is_no) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + int error; + + u32 ri; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if(!error) + { + f64 now = vlib_time_now (vm); + ip6_radv_t * radv_info; + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + /* prefix info add, delete or update */ + ip6_radv_prefix_t * prefix; + + /* lookup prefix info for this address on this interface */ + uword * p = mhash_get (&radv_info->address_to_prefix_index, prefix_addr); + + prefix = p ? pool_elt_at_index (radv_info->adv_prefixes_pool, p[0]) : 0; + + if(is_no) + { + /* delete */ + if(!prefix) + return VNET_API_ERROR_INVALID_VALUE; /* invalid prefix */ + + if(prefix->prefix_len != prefix_len) + return VNET_API_ERROR_INVALID_VALUE_2; + + /* FIXME - Should the DP do this or the CP ?*/ + /* do specific delete processing here before returning */ + /* try to remove from routing table */ + + mhash_unset (&radv_info->address_to_prefix_index, prefix_addr,/* old_value */ 0); + pool_put (radv_info->adv_prefixes_pool, prefix); + + radv_info->initial_adverts_sent = radv_info->initial_adverts_count -1; + radv_info->next_multicast_time = vlib_time_now (vm); + radv_info->last_multicast_time = vlib_time_now (vm); + radv_info->last_radv_time = 0; + return(error); + } + + /* adding or changing */ + if(!prefix) + { + /* add */ + u32 pi; + pool_get (radv_info->adv_prefixes_pool, prefix); + pi = prefix - radv_info->adv_prefixes_pool; + mhash_set (&radv_info->address_to_prefix_index, prefix_addr, pi, /* old_value */ 0); + + memset(prefix, 0x0, sizeof(ip6_radv_prefix_t)); + + prefix->prefix_len = prefix_len; + memcpy(&prefix->prefix, prefix_addr, sizeof(ip6_address_t)); + + /* initialize default values */ + prefix->adv_on_link_flag = 1; /* L bit set */ + prefix->adv_autonomous_flag = 1; /* A bit set */ + prefix->adv_valid_lifetime_in_secs = DEF_ADV_VALID_LIFETIME; + prefix->adv_pref_lifetime_in_secs = DEF_ADV_PREF_LIFETIME; + prefix->enabled = 1; + prefix->decrement_lifetime_flag = 1; + prefix->deprecated_prefix_flag = 1; + + if(off_link == 0) + { + /* FIXME - Should the DP do this or the CP ?*/ + /* insert prefix into routing table as a connected prefix */ + } + + if(use_default) + goto restart; + } + else + { + + if(prefix->prefix_len != prefix_len) + return VNET_API_ERROR_INVALID_VALUE_2; + + if(off_link != 0) + { + /* FIXME - Should the DP do this or the CP ?*/ + /* remove from routing table if already there */ + } + } + + if((val_lifetime == ~0) || (pref_lifetime == ~0)) + { + prefix->adv_valid_lifetime_in_secs = ~0; + prefix->adv_pref_lifetime_in_secs = ~0; + prefix->decrement_lifetime_flag = 0; + } + else + { + prefix->adv_valid_lifetime_in_secs = val_lifetime;; + prefix->adv_pref_lifetime_in_secs = pref_lifetime; + } + + /* copy remaining */ + prefix->enabled = !(no_advertise != 0); + prefix->adv_on_link_flag = !((off_link != 0) || (no_onlink != 0)); + prefix->adv_autonomous_flag = !(no_autoconfig != 0); + + restart: + /* restart */ + /* fill in the expiration times */ + prefix->valid_lifetime_expires = now + prefix->adv_valid_lifetime_in_secs; + prefix->pref_lifetime_expires = now + prefix->adv_pref_lifetime_in_secs; + + radv_info->initial_adverts_sent = radv_info->initial_adverts_count -1; + radv_info->next_multicast_time = vlib_time_now (vm); + radv_info->last_multicast_time = vlib_time_now (vm); + radv_info->last_radv_time = 0; + } + return(error); +} + +clib_error_t * +ip6_neighbor_cmd(vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + clib_error_t * error = 0; + u8 is_no = 0; + u8 surpress = 0, managed = 0, other = 0; + u8 surpress_ll_option = 0, send_unicast = 0, cease= 0; + u8 use_lifetime = 0; + u32 sw_if_index, ra_lifetime = 0, ra_initial_count = 0, ra_initial_interval = 0; + u32 ra_max_interval = 0 , ra_min_interval = 0; + + unformat_input_t _line_input, * line_input = &_line_input; + vnet_sw_interface_t * sw_if0; + + int add_radv_info = 1; + __attribute__((unused)) ip6_radv_t * radv_info = 0; + ip6_address_t ip6_addr; + u32 addr_len; + + + /* Get a line of input. */ + if (! unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + /* get basic radv info for this interface */ + if(unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat_user (line_input, + unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + u32 ri; + ethernet_interface_t * eth_if0 = 0; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index); + if(sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE) + eth_if0 = ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + + if(!eth_if0) + { + error = clib_error_return (0, "Interface must be of ethernet type"); + goto done; + } + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + if(ri != ~0) + { + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + } + else + { + error = clib_error_return (0, "unknown interface %U'", + format_unformat_error, line_input); + goto done; + } + } + else + { + error = clib_error_return (0, "invalid interface name %U'", + format_unformat_error, line_input); + goto done; + } + } + + /* get the rest of the command */ + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "no")) + is_no = 1; + else if(unformat (line_input, "prefix %U/%d", + unformat_ip6_address, &ip6_addr, + &addr_len)) + { + add_radv_info = 0; + break; + } + else if (unformat (line_input, "ra-managed-config-flag")) + { + managed = 1; + break; + } + else if (unformat (line_input, "ra-other-config-flag")) + { + other = 1; + break; + } + else if (unformat (line_input, "ra-surpress")) + { + surpress = 1; + break; + } + else if (unformat (line_input, "ra-surpress-link-layer")) + { + surpress_ll_option = 1; + break; + } + else if (unformat (line_input, "ra-send-unicast")) + { + send_unicast = 1; + break; + } + else if (unformat (line_input, "ra-lifetime")) + { + if (!unformat (line_input, "%d", &ra_lifetime)) + return(error = unformat_parse_error (line_input)); + use_lifetime = 1; + break; + } + else if (unformat (line_input, "ra-initial")) + { + if (!unformat (line_input, "%d %d", &ra_initial_count, &ra_initial_interval)) + return(error = unformat_parse_error (line_input)); + break; + } + else if (unformat (line_input, "ra-interval")) + { + if (!unformat (line_input, "%d", &ra_max_interval)) + return(error = unformat_parse_error (line_input)); + + if (!unformat (line_input, "%d", &ra_min_interval)) + ra_min_interval = 0; + break; + } + else if(unformat (line_input, "ra-cease")) + { + cease = 1; + break; + } + else + return(unformat_parse_error (line_input)); + } + + if(add_radv_info) + { + ip6_neighbor_ra_config(vm, sw_if_index, + surpress, managed, other, + surpress_ll_option, send_unicast, cease, + use_lifetime, ra_lifetime, + ra_initial_count, ra_initial_interval, + ra_max_interval, ra_min_interval, + is_no); + } + else + { + u32 valid_lifetime_in_secs = 0; + u32 pref_lifetime_in_secs = 0; + u8 use_prefix_default_values = 0; + u8 no_advertise = 0; + u8 off_link= 0; + u8 no_autoconfig = 0; + u8 no_onlink= 0; + + /* get the rest of the command */ + while(unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if(unformat (line_input, "default")) + { + use_prefix_default_values = 1; + break; + } + else if(unformat (line_input, "infinite")) + { + valid_lifetime_in_secs = ~0; + pref_lifetime_in_secs = ~0; + break; + } + else if(unformat (line_input, "%d %d", &valid_lifetime_in_secs, + &pref_lifetime_in_secs)) + break; + else + break; + } + + + /* get the rest of the command */ + while (!use_prefix_default_values && + unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if(unformat (line_input, "no-advertise")) + no_advertise = 1; + else if(unformat (line_input, "off-link")) + off_link = 1; + else if(unformat (line_input, "no-autoconfig")) + no_autoconfig = 1; + else if(unformat (line_input, "no-onlink")) + no_onlink = 1; + else + return(unformat_parse_error (line_input)); + } + + ip6_neighbor_ra_prefix(vm, sw_if_index, + &ip6_addr, addr_len, + use_prefix_default_values, + valid_lifetime_in_secs, + pref_lifetime_in_secs, + no_advertise, + off_link, + no_autoconfig, + no_onlink, + is_no); + } + + unformat_free (line_input); + + done: + return error; +} + +static clib_error_t * +show_ip6_interface_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + clib_error_t * error = 0; + u32 sw_if_index; + + sw_if_index = ~0; + + if (unformat_user (input, + unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + u32 ri; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + if(ri != ~0) + { + ip_lookup_main_t * lm = &ip6_main.lookup_main; + ip6_radv_t * radv_info; + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + vlib_cli_output (vm, "%U is admin %s\n", format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, sw_if_index), + (vnet_sw_interface_is_admin_up (vnm, sw_if_index) ? "up" : "down")); + + u32 ai; + u32 *global_scope = 0,i; + ip_interface_address_t * a; + + vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index, sw_if_index, ~0); + ai = lm->if_address_pool_index_by_sw_if_index[sw_if_index]; + + while (ai != (u32)~0) + { + a = pool_elt_at_index(lm->if_address_pool, ai); + ip6_address_t * address = ip_interface_address_get_address (lm, a); + + if( ip6_address_is_link_local_unicast (address)) + vlib_cli_output (vm, "\tIPv6 is enabled, link-local address is %U\n", format_ip6_address, + address); + + if((address->as_u8[0] & 0xe0) == 0x20) + vec_add1 (global_scope, ai); + + ai = a->next_this_sw_interface; + } + + vlib_cli_output (vm, "\tGlobal unicast address(es):\n"); + for (i = 0; i < vec_len (global_scope); i++) + { + a = pool_elt_at_index(lm->if_address_pool, global_scope[i]); + ip6_address_t * address = ip_interface_address_get_address (lm, a); + ip6_address_t mask, subnet; + + subnet = *address; + ip6_address_mask_from_width(&mask, a->address_length); + ip6_address_mask(&subnet, &mask); + + vlib_cli_output (vm, "\t\t%U, subnet is %U/%d", + format_ip6_address, address, + format_ip6_address,&subnet, + a->address_length); + } + vec_free (global_scope); + vlib_cli_output (vm, "\tJoined group address(es):\n"); + ip6_mldp_group_t *m; + pool_foreach (m, radv_info->mldp_group_pool, ({ + vlib_cli_output (vm, "\t\t%U\n", format_ip6_address, &m->mcast_address); + })); + + vlib_cli_output (vm, "\tAdvertised Prefixes:\n"); + ip6_radv_prefix_t * p; + pool_foreach (p, radv_info->adv_prefixes_pool, ({ + vlib_cli_output (vm, "\t\tprefix %U, length %d\n", + format_ip6_address, &p->prefix, p->prefix_len); + })); + + vlib_cli_output (vm, "\tMTU is %d\n", radv_info->adv_link_mtu); + vlib_cli_output (vm, "\tICMP error messages are unlimited\n"); + vlib_cli_output (vm, "\tICMP redirects are disabled\n"); + vlib_cli_output (vm, "\tICMP unreachables are not sent\n"); + vlib_cli_output (vm, "\tND DAD is disabled\n"); + //vlib_cli_output (vm, "\tND reachable time is %d milliseconds\n",); + vlib_cli_output (vm, "\tND advertised reachable time is %d\n", + radv_info->adv_neighbor_reachable_time_in_msec); + vlib_cli_output (vm, "\tND advertised retransmit interval is %d (msec)\n", + radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations); + + u32 ra_interval = radv_info->max_radv_interval; + u32 ra_interval_min = radv_info->min_radv_interval; + vlib_cli_output (vm, "\tND router advertisements are sent every %d seconds (min interval is %d)\n", + ra_interval, ra_interval_min); + vlib_cli_output (vm, "\tND router advertisements live for %d seconds\n", + radv_info->adv_router_lifetime_in_sec); + vlib_cli_output (vm, "\tHosts %s stateless autoconfig for addresses\n", + (radv_info->adv_managed_flag) ? "use" :" don't use"); + vlib_cli_output (vm, "\tND router advertisements sent %d\n", radv_info->n_advertisements_sent); + vlib_cli_output (vm, "\tND router solicitations received %d\n", radv_info->n_solicitations_rcvd); + vlib_cli_output (vm, "\tND router solicitations dropped %d\n", radv_info->n_solicitations_dropped); + } + else + { + error = clib_error_return (0, "Ipv6 not enabled on interface", + format_unformat_error, input); + + } + } + return error; +} + +VLIB_CLI_COMMAND (show_ip6_interface_command, static) = { + .path = "show ip6 interface", + .function = show_ip6_interface_cmd, + .short_help = "Show ip6 interface <iface name>", +}; + +clib_error_t * +disable_ip6_interface(vlib_main_t * vm, + u32 sw_if_index) +{ + clib_error_t * error = 0; + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + u32 ri; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0); + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + /* if not created - do nothing */ + if(ri != ~0) + { + vnet_main_t * vnm = vnet_get_main(); + ip6_radv_t * radv_info; + + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + /* check radv_info ref count for other ip6 addresses on this interface */ + if(radv_info->ref_count == 0 ) + { + /* essentially "disables" ipv6 on this interface */ + error = ip6_add_del_interface_address (vm, sw_if_index, + &radv_info->link_local_address, + radv_info->link_local_prefix_len, + 1 /* is_del */); + + ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0/* is_add */); + } + } + return error; +} + +int +ip6_interface_enabled(vlib_main_t * vm, + u32 sw_if_index) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + u32 ri = ~0; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + return ri != ~0; +} + +clib_error_t * +enable_ip6_interface(vlib_main_t * vm, + u32 sw_if_index) +{ + clib_error_t * error = 0; + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + u32 ri; + int is_add = 1; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + /* if not created yet */ + if(ri == ~0) + { + vnet_main_t * vnm = vnet_get_main(); + vnet_sw_interface_t * sw_if0; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index); + if(sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE) + { + ethernet_interface_t * eth_if0; + + eth_if0 = ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + if(eth_if0) + { + /* create radv_info. for this interface. This holds all the info needed for router adverts */ + ri = ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, is_add); + + if(ri != ~0) + { + ip6_radv_t * radv_info; + ip6_address_t link_local_address; + + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + ip6_link_local_address_from_ethernet_mac_address (&link_local_address, + eth_if0->address); + + sw_if0 = vnet_get_sw_interface (vnm, sw_if_index); + if(sw_if0->type == VNET_SW_INTERFACE_TYPE_SUB) + { + /* make up an interface id */ + md5_context_t m; + u8 digest[16]; + + link_local_address.as_u64[0] = radv_info->randomizer; + + md5_init (&m); + md5_add (&m, &link_local_address, 16); + md5_finish (&m, digest); + + memcpy(&link_local_address, digest, 16); + + radv_info->randomizer = link_local_address.as_u64[0]; + + link_local_address.as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL); + /* clear u bit */ + link_local_address.as_u8[8] &= 0xfd; + } + + /* essentially "enables" ipv6 on this interface */ + error = ip6_add_del_interface_address (vm, sw_if_index, + &link_local_address, 64 /* address width */, + 0 /* is_del */); + + if(error) + ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, !is_add); + else + { + radv_info->link_local_address = link_local_address; + radv_info->link_local_prefix_len = 64; + } + } + } + } + } + return error; +} + +static clib_error_t * +enable_ip6_interface_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + + sw_if_index = ~0; + + if (unformat_user (input, + unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + enable_ip6_interface(vm, sw_if_index); + } + else + { + error = clib_error_return (0, "unknown interface\n'", + format_unformat_error, input); + + } + return error; +} + +VLIB_CLI_COMMAND (enable_ip6_interface_command, static) = { + .path = "enable ip6 interface", + .function = enable_ip6_interface_cmd, + .short_help = "enable ip6 interface <iface name>", +}; + +static clib_error_t * +disable_ip6_interface_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + + sw_if_index = ~0; + + if (unformat_user (input, + unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = disable_ip6_interface(vm, sw_if_index); + } + else + { + error = clib_error_return (0, "unknown interface\n'", + format_unformat_error, input); + + } + return error; +} + +VLIB_CLI_COMMAND (disable_ip6_interface_command, static) = { + .path = "disable ip6 interface", + .function = disable_ip6_interface_cmd, + .short_help = "disable ip6 interface <iface name>", +}; + +VLIB_CLI_COMMAND (ip6_nd_command, static) = { + .path = "ip6 nd", + .short_help = "Set ip6 neighbor discovery parameters", + .function = ip6_neighbor_cmd, +}; + +clib_error_t * +set_ip6_link_local_address(vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t *address, + u8 address_length) +{ + clib_error_t * error = 0; + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + u32 ri; + ip6_radv_t * radv_info; + vnet_main_t * vnm = vnet_get_main(); + + if( !ip6_address_is_link_local_unicast (address)) + { + vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_LINK_LOCAL; + return(error = clib_error_return (0, "address not link-local", + format_unformat_error)); + } + + /* call enable ipv6 */ + enable_ip6_interface(vm, sw_if_index); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + if(ri != ~0) + { + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + /* save if link local address (overwrite default) */ + + /* delete the old one */ + error = ip6_add_del_interface_address (vm, sw_if_index, + &radv_info->link_local_address, + radv_info->link_local_prefix_len /* address width */, + 1 /* is_del */); + + if(!error) + { + /* add the new one */ + error = ip6_add_del_interface_address (vm, sw_if_index, + address , + address_length /* address width */, + 0/* is_del */); + + if(!error) + { + radv_info->link_local_address = *address; + radv_info->link_local_prefix_len = address_length; + } + } + } + else + { + vnm->api_errno = VNET_API_ERROR_IP6_NOT_ENABLED; + error = clib_error_return (0, "ip6 not enabled for interface", + format_unformat_error); + } + return error; +} + +clib_error_t * +set_ip6_link_local_address_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + ip6_address_t ip6_addr; + u32 addr_len = 0; + + if (unformat_user (input, + unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + /* get the rest of the command */ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if(unformat (input, "%U/%d", + unformat_ip6_address, &ip6_addr, + &addr_len)) + break; + else + return(unformat_parse_error (input)); + } + } + error = set_ip6_link_local_address(vm, + sw_if_index, + &ip6_addr, + addr_len); + return error; +} + +VLIB_CLI_COMMAND (set_ip6_link_local_address_command, static) = { + .path = "set ip6 link-local address", + .short_help = "Set ip6 interface link-local address <intfc> <address.>", + .function = set_ip6_link_local_address_cmd, +}; + +/* callback when an interface address is added or deleted */ +static void +ip6_neighbor_add_del_interface_address (ip6_main_t * im, + uword opaque, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, + u32 if_address_index, + u32 is_delete) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + u32 ri; + vlib_main_t * vm = vnm->vlib_main; + ip6_radv_t * radv_info; + ip6_address_t a; + ip6_mldp_group_t *mcast_group_info; + + /* create solicited node multicast address for this interface adddress */ + ip6_set_solicited_node_multicast_address (&a, 0); + + a.as_u8[0xd] = address->as_u8[0xd]; + a.as_u8[0xe] = address->as_u8[0xe]; + a.as_u8[0xf] = address->as_u8[0xf]; + + if(!is_delete) + { + /* try to create radv_info - does nothing if ipv6 already enabled */ + enable_ip6_interface(vm, sw_if_index); + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0); + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + if(ri != ~0) + { + /* get radv_info */ + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + /* add address */ + if( !ip6_address_is_link_local_unicast (address)) + radv_info->ref_count++; + + /* lookup prefix info for this address on this interface */ + uword * p = mhash_get (&radv_info->address_to_mldp_index, &a); + mcast_group_info = p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0; + + /* add -solicted node multicast address */ + if(!mcast_group_info) + { + /* add */ + u32 mi; + pool_get (radv_info->mldp_group_pool, mcast_group_info); + + mi = mcast_group_info - radv_info->mldp_group_pool; + mhash_set (&radv_info->address_to_mldp_index, &a, mi, /* old_value */ 0); + + mcast_group_info->type = 4; + mcast_group_info->mcast_source_address_pool = 0; + mcast_group_info->num_sources = 0; + memcpy(&mcast_group_info->mcast_address, &a, sizeof(ip6_address_t)); + } + } + } + else + { + + /* delete */ + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0); + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + if(ri != ~0) + { + /* get radv_info */ + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + /* lookup prefix info for this address on this interface */ + uword * p = mhash_get (&radv_info->address_to_mldp_index, &a); + mcast_group_info = p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0; + + if(mcast_group_info) + { + mhash_unset (&radv_info->address_to_mldp_index, &a,/* old_value */ 0); + pool_put (radv_info->mldp_group_pool, mcast_group_info); + } + + /* if interface up send MLDP "report" */ + radv_info->all_routers_mcast = 0; + + /* add address */ + if( !ip6_address_is_link_local_unicast (address)) + radv_info->ref_count--; + } + } +} + +clib_error_t *ip6_set_neighbor_limit (u32 neighbor_limit) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + + nm->limit_neighbor_cache_size = neighbor_limit; + return 0; +} + +static clib_error_t * ip6_neighbor_init (vlib_main_t * vm) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + ip6_main_t * im = &ip6_main; + + mhash_init (&nm->neighbor_index_by_key, + /* value size */ sizeof (uword), + /* key size */ sizeof (ip6_neighbor_key_t)); + + icmp6_register_type (vm, ICMP6_neighbor_solicitation, ip6_icmp_neighbor_solicitation_node.index); + icmp6_register_type (vm, ICMP6_neighbor_advertisement, ip6_icmp_neighbor_advertisement_node.index); + icmp6_register_type (vm, ICMP6_router_solicitation, ip6_icmp_router_solicitation_node.index); + icmp6_register_type (vm, ICMP6_router_advertisement, ip6_icmp_router_advertisement_node.index); + + /* handler node for ip6 neighbor discovery events and timers */ + vlib_register_node (vm, &ip6_icmp_neighbor_discovery_event_node); + + /* add call backs */ + ip6_add_del_interface_address_callback_t cb; + memset(&cb, 0x0, sizeof(ip6_add_del_interface_address_callback_t)); + + /* when an interface address changes... */ + cb.function = ip6_neighbor_add_del_interface_address; + cb.function_opaque = 0; + vec_add1 (im->add_del_interface_address_callbacks, cb); + + mhash_init (&nm->pending_resolutions_by_address, + /* value size */ sizeof (uword), + /* key size */ sizeof (ip6_address_t)); + + /* default, configurable */ + nm->limit_neighbor_cache_size = 50000; + +#if 0 + /* $$$$ Hack fix for today */ + vec_validate_init_empty + (im->discover_neighbor_next_index_by_hw_if_index, 32, 0 /* drop */); +#endif + + return 0; +} + +VLIB_INIT_FUNCTION (ip6_neighbor_init); + + +void vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm, + void * address_arg, + uword node_index, + uword type_opaque, + uword data) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + ip6_address_t * address = address_arg; + uword * p; + pending_resolution_t * pr; + + pool_get (nm->pending_resolutions, pr); + + pr->next_index = ~0; + pr->node_index = node_index; + pr->type_opaque = type_opaque; + pr->data = data; + + p = mhash_get (&nm->pending_resolutions_by_address, address); + if (p) + { + /* Insert new resolution at the head of the list */ + pr->next_index = p[0]; + mhash_unset (&nm->pending_resolutions_by_address, address, 0); + } + + mhash_set (&nm->pending_resolutions_by_address, address, + pr - nm->pending_resolutions, 0 /* old value */); +} + diff --git a/vnet/vnet/ip/ip6_packet.h b/vnet/vnet/ip/ip6_packet.h new file mode 100644 index 00000000000..9a52cf72586 --- /dev/null +++ b/vnet/vnet/ip/ip6_packet.h @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip6/packet.h: ip6 packet format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip6_packet_h +#define included_ip6_packet_h + +typedef union { + u8 as_u8[16]; + u16 as_u16[8]; + u32 as_u32[4]; + u64 as_u64[2]; + uword as_uword[16 / sizeof (uword)]; +} ip6_address_t; + +/* Packed so that the mhash key doesn't include uninitialized pad bytes */ +typedef CLIB_PACKED (struct { + /* IP address must be first for ip_interface_address_get_address() to work */ + ip6_address_t ip6_addr; + u32 fib_index; +}) ip6_address_fib_t; + +always_inline void +ip6_addr_fib_init (ip6_address_fib_t * addr_fib, ip6_address_t * address, + u32 fib_index) +{ + addr_fib->ip6_addr.as_u64[0] = address->as_u64[0]; + addr_fib->ip6_addr.as_u64[1] = address->as_u64[1]; + addr_fib->fib_index = fib_index; +} + +/* Special addresses: + unspecified ::/128 + loopback ::1/128 + global unicast 2000::/3 + unique local unicast fc00::/7 + link local unicast fe80::/10 + multicast ff00::/8 + ietf reserved everything else. */ + +#define foreach_ip6_multicast_address_scope \ + _ (loopback, 0x1) \ + _ (link_local, 0x2) \ + _ (admin_local, 0x4) \ + _ (site_local, 0x5) \ + _ (organization_local, 0x8) \ + _ (global, 0xe) + +#define foreach_ip6_multicast_link_local_group_id \ + _ (all_hosts, 0x1) \ + _ (all_routers, 0x2) \ + _ (rip_routers, 0x9) \ + _ (eigrp_routers, 0xa) \ + _ (pim_routers, 0xd) \ + _ (mldv2_routers, 0x16) + +typedef enum { +#define _(f,n) IP6_MULTICAST_SCOPE_##f = n, + foreach_ip6_multicast_address_scope +#undef _ +} ip6_multicast_address_scope_t; + +typedef enum { +#define _(f,n) IP6_MULTICAST_GROUP_ID_##f = n, + foreach_ip6_multicast_link_local_group_id +#undef _ +} ip6_multicast_link_local_group_id_t; + +always_inline uword +ip6_address_is_multicast (ip6_address_t * a) +{ return a->as_u8[0] == 0xff; } + +always_inline void +ip6_set_reserved_multicast_address (ip6_address_t * a, + ip6_multicast_address_scope_t scope, + u16 id) +{ + a->as_u64[0] = a->as_u64[1] = 0; + a->as_u16[0] = clib_host_to_net_u16 (0xff00 | scope); + a->as_u16[7] = clib_host_to_net_u16 (id); +} + +always_inline void +ip6_set_solicited_node_multicast_address (ip6_address_t * a, u32 id) +{ + /* 0xff02::1:ffXX:XXXX. */ + a->as_u64[0] = a->as_u64[1] = 0; + a->as_u16[0] = clib_host_to_net_u16 (0xff02); + a->as_u8[11] = 1; + ASSERT ((id >> 24) == 0); + id |= 0xff << 24; + a->as_u32[3] = clib_host_to_net_u32 (id); +} + +always_inline void +ip6_link_local_address_from_ethernet_address (ip6_address_t * a, u8 * ethernet_address) +{ + a->as_u64[0] = a->as_u64[1] = 0; + a->as_u16[0] = clib_host_to_net_u16 (0xfe80); + /* Always set locally administered bit (6). */ + a->as_u8[0x8] = ethernet_address[0] | (1 << 6); + a->as_u8[0x9] = ethernet_address[1]; + a->as_u8[0xa] = ethernet_address[2]; + a->as_u8[0xb] = 0xff; + a->as_u8[0xc] = 0xfe; + a->as_u8[0xd] = ethernet_address[3]; + a->as_u8[0xe] = ethernet_address[4]; + a->as_u8[0xf] = ethernet_address[5]; +} + +always_inline void +ip6_multicast_ethernet_address (u8 * ethernet_address, u32 group_id) +{ + ethernet_address[0] = 0x33; + ethernet_address[1] = 0x33; + ethernet_address[2] = ((group_id >> 24) & 0xff); + ethernet_address[3] = ((group_id >> 16) & 0xff); + ethernet_address[4] = ((group_id >> 8) & 0xff); + ethernet_address[5] = ((group_id >> 0) & 0xff); +} + +always_inline uword +ip6_address_is_equal (ip6_address_t * a, ip6_address_t * b) +{ + int i; + for (i = 0; i < ARRAY_LEN (a->as_uword); i++) + if (a->as_uword[i] != b->as_uword[i]) + return 0; + return 1; +} + +always_inline uword +ip6_address_is_equal_masked (ip6_address_t * a, ip6_address_t * b, + ip6_address_t * mask) +{ + int i; + for (i = 0; i < ARRAY_LEN (a->as_uword); i++) + { + uword a_masked, b_masked; + a_masked = a->as_uword[i] & mask->as_uword[i]; + b_masked = b->as_uword[i] & mask->as_uword[i]; + + if (a_masked != b_masked) + return 0; + } + return 1; +} + +always_inline void +ip6_address_mask (ip6_address_t * a, ip6_address_t * mask) +{ + int i; + for (i = 0; i < ARRAY_LEN (a->as_uword); i++) + a->as_uword[i] &= mask->as_uword[i]; +} + +always_inline void +ip6_address_set_zero (ip6_address_t * a) +{ + int i; + for (i = 0; i < ARRAY_LEN (a->as_uword); i++) + a->as_uword[i] = 0; +} + +always_inline void +ip6_address_mask_from_width (ip6_address_t * a, u32 width) +{ + int i, byte, bit, bitnum; + ASSERT (width <= 128); + memset (a, 0, sizeof (a[0])); + for (i = 0; i < width; i++) + { + bitnum = (7 - (i & 7)); + byte = i / 8; + bit = 1<<bitnum; + a->as_u8[byte] |= bit; + } +} + +always_inline uword +ip6_address_is_zero (ip6_address_t * a) +{ + int i; + for (i = 0; i < ARRAY_LEN (a->as_uword); i++) + if (a->as_uword[i] != 0) + return 0; + return 1; +} + +/* Check for unspecified address ::0 */ +always_inline uword +ip6_address_is_unspecified (ip6_address_t * a) +{ return ip6_address_is_zero (a); } + +/* Check for loopback address ::1 */ +always_inline uword +ip6_address_is_loopback (ip6_address_t * a) +{ + uword is_loopback; + u8 save = a->as_u8[15]; + a->as_u8[15] = save ^ 1; + is_loopback = ip6_address_is_zero (a); + a->as_u8[15] = save; + return is_loopback; +} + +/* Check for link local unicast fe80::/10. */ +always_inline uword +ip6_address_is_link_local_unicast (ip6_address_t * a) +{ return a->as_u8[0] == 0xfe && (a->as_u8[1] & 0xc0) == 0x80; } + +/* Check for unique local unicast fc00::/7. */ +always_inline uword +ip6_address_is_local_unicast (ip6_address_t * a) +{ return (a->as_u8[0] & 0xfe) == 0xfc; } + +/* Check for solicited node multicast 0xff02::1:ff00:0/104 */ +always_inline uword +ip6_is_solicited_node_multicast_address (ip6_address_t * a) +{ + return (a->as_u32[0] == clib_host_to_net_u32 (0xff020000) + && a->as_u32[1] == 0 + && a->as_u32[2] == clib_host_to_net_u32 (1) + && a->as_u8[12] == 0xff); +} + +typedef struct { + /* 4 bit version, 8 bit traffic class and 20 bit flow label. */ + u32 ip_version_traffic_class_and_flow_label; + + /* Total packet length not including this header (but including + any extension headers if present). */ + u16 payload_length; + + /* Protocol for next header. */ + u8 protocol; + + /* Hop limit decremented by router at each hop. */ + u8 hop_limit; + + /* Source and destination address. */ + ip6_address_t src_address, dst_address; +} ip6_header_t; + +always_inline void * +ip6_next_header (ip6_header_t * i) +{ return (void *) (i + 1); } + +always_inline void +ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0) +{ + { + ip6_address_t src0, dst0; + + src0 = ip0->src_address; + dst0 = ip0->dst_address; + ip0->src_address = dst0; + ip0->dst_address = src0; + } + + { + u16 src0, dst0; + + src0 = tcp0->ports.src; + dst0 = tcp0->ports.dst; + tcp0->ports.src = dst0; + tcp0->ports.dst = src0; + } +} + +always_inline void +ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1, + tcp_header_t * tcp0, tcp_header_t * tcp1) +{ + { + ip6_address_t src0, dst0, src1, dst1; + + src0 = ip0->src_address; + src1 = ip1->src_address; + dst0 = ip0->dst_address; + dst1 = ip1->dst_address; + ip0->src_address = dst0; + ip1->src_address = dst1; + ip0->dst_address = src0; + ip1->dst_address = src1; + } + + { + u16 src0, dst0, src1, dst1; + + src0 = tcp0->ports.src; + src1 = tcp1->ports.src; + dst0 = tcp0->ports.dst; + dst1 = tcp1->ports.dst; + tcp0->ports.src = dst0; + tcp1->ports.src = dst1; + tcp0->ports.dst = src0; + tcp1->ports.dst = src1; + } +} + + +typedef CLIB_PACKED (struct { + u8 data; +}) ip6_pad1_option_t; + +typedef CLIB_PACKED (struct { + u8 type; + u8 len; + u8 data[0]; +}) ip6_padN_option_t; + +typedef CLIB_PACKED (struct { +#define IP6_MLDP_ALERT_TYPE 0x5 + u8 type; + u8 len; + u16 value; +}) ip6_router_alert_option_t; + +typedef CLIB_PACKED (struct { + u8 next_hdr; + /* Length of this header plus option data in 8 byte units. */ + u8 n_data_u64s; + u8 data[0]; +}) ip6_hop_by_hop_ext_t; + +typedef CLIB_PACKED (struct { + u8 next_hdr; + u8 rsv; + u16 fragment_offset_and_more; + u32 identification; +}) ip6_frag_hdr_t; + +#define ip6_frag_hdr_offset(hdr) \ + (clib_net_to_host_u16((hdr)->fragment_offset_and_more) >> 3) + +#define ip6_frag_hdr_more(hdr) \ + (clib_net_to_host_u16((hdr)->fragment_offset_and_more) & 0x1) + +#define ip6_frag_hdr_offset_and_more(offset, more) \ + clib_host_to_net_u16(((offset) << 3) + !!(more)) + +#endif /* included_ip6_packet_h */ diff --git a/vnet/vnet/ip/ip6_pg.c b/vnet/vnet/ip/ip6_pg.c new file mode 100644 index 00000000000..2c3852765d4 --- /dev/null +++ b/vnet/vnet/ip/ip6_pg.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6_pg: IP v4 packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> + +static void +ip6_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, + u32 n_packets) +{ + vlib_main_t * vm = pg->vlib_main; + u32 ip_header_offset = g->start_byte_offset; + + while (n_packets >= 2) + { + u32 pi0, pi1; + vlib_buffer_t * p0, * p1; + ip6_header_t * ip0, * ip1; + + pi0 = packets[0]; + pi1 = packets[1]; + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + n_packets -= 2; + packets += 2; + + ip0 = (void *) (p0->data + ip_header_offset); + ip1 = (void *) (p1->data + ip_header_offset); + + ip0->payload_length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - ip_header_offset - sizeof (ip0[0])); + ip1->payload_length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) - ip_header_offset - sizeof (ip1[0])); + } + + while (n_packets >= 1) + { + u32 pi0; + vlib_buffer_t * p0; + ip6_header_t * ip0; + + pi0 = packets[0]; + p0 = vlib_get_buffer (vm, pi0); + n_packets -= 1; + packets += 1; + + ip0 = (void *) (p0->data + ip_header_offset); + + ip0->payload_length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - ip_header_offset - sizeof (ip0[0])); + } +} + +typedef struct { + pg_edit_t ip_version; + pg_edit_t traffic_class; + pg_edit_t flow_label; + pg_edit_t payload_length; + pg_edit_t protocol; + pg_edit_t hop_limit; + pg_edit_t src_address, dst_address; +} pg_ip6_header_t; + +static inline void +pg_ip6_header_init (pg_ip6_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, ip6_header_t, f); + _ (payload_length); + _ (hop_limit); + _ (protocol); + _ (src_address); + _ (dst_address); +#undef _ + + /* Initialize bit fields. */ + pg_edit_init_bitfield (&p->ip_version, ip6_header_t, + ip_version_traffic_class_and_flow_label, + 28, 4); + pg_edit_init_bitfield (&p->traffic_class, ip6_header_t, + ip_version_traffic_class_and_flow_label, + 20, 8); + pg_edit_init_bitfield (&p->flow_label, ip6_header_t, + ip_version_traffic_class_and_flow_label, + 0, 20); +} + +uword +unformat_pg_ip6_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_ip6_header_t * p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip6_header_t), + &group_index); + pg_ip6_header_init (p); + + /* Defaults. */ + pg_edit_set_fixed (&p->ip_version, 6); + pg_edit_set_fixed (&p->traffic_class, 0); + pg_edit_set_fixed (&p->flow_label, 0); + pg_edit_set_fixed (&p->hop_limit, 64); + + p->payload_length.type = PG_EDIT_UNSPECIFIED; + + if (! unformat (input, "%U: %U -> %U", + unformat_pg_edit, + unformat_ip_protocol, &p->protocol, + unformat_pg_edit, + unformat_ip6_address, &p->src_address, + unformat_pg_edit, + unformat_ip6_address, &p->dst_address)) + goto error; + + /* Parse options. */ + while (1) + { + if (unformat (input, "version %U", + unformat_pg_edit, + unformat_pg_number, &p->ip_version)) + ; + + else if (unformat (input, "traffic-class %U", + unformat_pg_edit, + unformat_pg_number, &p->traffic_class)) + ; + + else if (unformat (input, "length %U", + unformat_pg_edit, + unformat_pg_number, &p->payload_length)) + ; + + else if (unformat (input, "hop-limit %U", + unformat_pg_edit, + unformat_pg_number, &p->hop_limit)) + ; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t * im = &ip_main; + ip_protocol_t protocol; + ip_protocol_info_t * pi; + + pi = 0; + if (p->protocol.type == PG_EDIT_FIXED) + { + protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO); + pi = ip_get_protocol_info (im, protocol); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->payload_length.type == PG_EDIT_UNSPECIFIED + && s->min_packet_bytes == s->max_packet_bytes + && group_index + 1 < vec_len (s->edit_groups)) + { + pg_edit_set_fixed (&p->payload_length, + pg_edit_group_n_bytes (s, group_index) - sizeof (ip6_header_t)); + } + + p = pg_get_edit_group (s, group_index); + if (p->payload_length.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + g->edit_function = ip6_pg_edit_function; + } + + return 1; + } + + error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + diff --git a/vnet/vnet/ip/ip_checksum.c b/vnet/vnet/ip/ip_checksum.c new file mode 100644 index 00000000000..23e7889bc7e --- /dev/null +++ b/vnet/vnet/ip/ip_checksum.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip4/ip_checksum.c: ip/tcp/udp checksums + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +ip_csum_t +ip_incremental_checksum (ip_csum_t sum, void * _data, uword n_bytes) +{ + uword data = pointer_to_uword (_data); + ip_csum_t sum0, sum1; + + sum0 = 0; + sum1 = sum; + + /* Align data pointer to 64 bits. */ +#define _(t) \ +do { \ + if (n_bytes >= sizeof (t) \ + && sizeof (t) < sizeof (ip_csum_t) \ + && (data % (2 * sizeof (t))) != 0) \ + { \ + sum0 += * uword_to_pointer (data, t *); \ + data += sizeof (t); \ + n_bytes -= sizeof (t); \ + } \ +} while (0) + + _ (u8); + _ (u16); + if (BITS (ip_csum_t) > 32) + _ (u32); + +#undef _ + + { + ip_csum_t * d = uword_to_pointer (data, ip_csum_t *); + + while (n_bytes >= 2 * sizeof (d[0])) + { + sum0 = ip_csum_with_carry (sum0, d[0]); + sum1 = ip_csum_with_carry (sum1, d[1]); + d += 2; + n_bytes -= 2 * sizeof (d[0]); + } + + data = pointer_to_uword (d); + } + +#define _(t) \ +do { \ + if (n_bytes >= sizeof (t) && sizeof (t) <= sizeof (ip_csum_t)) \ + { \ + sum0 = ip_csum_with_carry (sum0, * uword_to_pointer (data, t *)); \ + data += sizeof (t); \ + n_bytes -= sizeof (t); \ + } \ +} while (0) + + if (BITS (ip_csum_t) > 32) + _ (u64); + _ (u32); + _ (u16); + _ (u8); + +#undef _ + + /* Combine even and odd sums. */ + sum0 = ip_csum_with_carry (sum0, sum1); + + return sum0; +} + +ip_csum_t +ip_csum_and_memcpy (ip_csum_t sum, void * dst, void * src, uword n_bytes) +{ + uword n_left, n_left_odd; + ip_csum_t * dst_even, * src_even; + ip_csum_t sum0 = sum, sum1; + + dst_even = uword_to_pointer + (pointer_to_uword (dst) &~ (sizeof (sum) - 1), + ip_csum_t *); + src_even = src; + + n_left = n_bytes; + if ((n_left_odd = dst - (void *) dst_even)) + { + u8 * d8 = dst, * s8 = src; + uword i, n_copy_odd; + + n_copy_odd = clib_min (n_left, n_left_odd); + + for (i = 0; i < n_copy_odd; i++) + d8[i] = s8[i]; + + if (n_copy_odd != n_left_odd) + return sum0; + + sum0 = ip_csum_with_carry (sum0, dst_even[0]); + dst_even += 1; + src_even = (void *) (src + n_copy_odd); + n_left -= n_left_odd; + } + + sum1 = 0; + while (n_left >= 2 * sizeof (dst_even[0])) + { + ip_csum_t dst0, dst1; + + dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t); + dst1 = clib_mem_unaligned (&src_even[1], ip_csum_t); + + dst_even[0] = dst0; + dst_even[1] = dst1; + + dst_even += 2; + src_even += 2; + n_left -= 2 * sizeof (dst_even[0]); + + sum0 = ip_csum_with_carry (sum0, dst0); + sum1 = ip_csum_with_carry (sum1, dst1); + } + + if (n_left >= 1 * sizeof (dst_even[0])) + { + ip_csum_t dst0; + + dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t); + + dst_even[0] = dst0; + + dst_even += 1; + src_even += 1; + n_left -= 1 * sizeof (dst_even[0]); + + sum0 = ip_csum_with_carry (sum0, dst0); + } + + if (n_left > 0) + { + u8 * d8 = dst, * s8 = src; + uword i; + for (i = 0; i < n_left; i++) + d8[i] = s8[i]; + } + + return ip_csum_with_carry (sum0, sum1); +} diff --git a/vnet/vnet/ip/ip_frag.c b/vnet/vnet/ip/ip_frag.c new file mode 100644 index 00000000000..22176187a9c --- /dev/null +++ b/vnet/vnet/ip/ip_frag.c @@ -0,0 +1,449 @@ +/*--------------------------------------------------------------------------- + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +/* + * IPv4 Fragmentation Node + * + * + */ + +#include "ip_frag.h" + +#include <vnet/ip/ip.h> + + +typedef struct { + u8 ipv6; + u16 header_offset; + u16 mtu; + u8 next; + u16 n_fragments; +} ip_frag_trace_t; + +static u8 * format_ip_frag_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip_frag_trace_t * t = va_arg (*args, ip_frag_trace_t *); + s = format(s, "IPv%s offset: %u mtu: %u fragments: %u next: %s", + t->ipv6?"6":"4", + t->header_offset, t->mtu, t->n_fragments, node->next_node_names[t->next]); + return s; +} + +static u32 running_fragment_id; + +static void +ip4_frag_do_fragment(vlib_main_t *vm, u32 pi, u32 **buffer, ip_frag_error_t *error) +{ + vlib_buffer_t *p; + ip4_header_t *ip4; + u16 mtu, ptr, len, max, rem, + offset, ip_frag_id, ip_frag_offset; + u8 *packet, more; + + vec_add1(*buffer, pi); + p = vlib_get_buffer(vm, pi); + offset = vnet_buffer(p)->ip_frag.header_offset; + mtu = vnet_buffer(p)->ip_frag.mtu; + packet = (u8 *)vlib_buffer_get_current(p); + ip4 = (ip4_header_t *)(packet + offset); + + rem = clib_net_to_host_u16(ip4->length) - sizeof(*ip4); + ptr = 0; + max = (mtu - sizeof(*ip4) - vnet_buffer(p)->ip_frag.header_offset) & ~0x7; + + if (rem < (p->current_length - offset - sizeof(*ip4))) { + *error = IP_FRAG_ERROR_MALFORMED; + return; + } + + if (mtu < sizeof(*ip4)) { + *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER; + return; + } + + if (ip4->flags_and_fragment_offset & + clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT)) { + *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET; + return; + } + + if (ip4_is_fragment(ip4)) { + ip_frag_id = ip4->fragment_id; + ip_frag_offset = ip4_get_fragment_offset(ip4); + more = !!(ip4->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS)); + } else { + ip_frag_id = (++running_fragment_id); + ip_frag_offset = 0; + more = 0; + } + + //Do the actual fragmentation + while (rem) { + u32 bi; + vlib_buffer_t *b; + ip4_header_t *fip4; + + len = (rem > (mtu - sizeof(*ip4) - vnet_buffer(p)->ip_frag.header_offset)) ? max : rem; + + if (ptr == 0) { + bi = pi; + b = p; + fip4 = (ip4_header_t *)(vlib_buffer_get_current(b) + offset); + } else { + if (!vlib_buffer_alloc(vm, &bi, 1)) { + *error = IP_FRAG_ERROR_MEMORY; + return; + } + vec_add1(*buffer, bi); + b = vlib_get_buffer(vm, bi); + vnet_buffer(b)->sw_if_index[VLIB_RX] = vnet_buffer(p)->sw_if_index[VLIB_RX]; + vnet_buffer(b)->sw_if_index[VLIB_TX] = vnet_buffer(p)->sw_if_index[VLIB_TX]; + fip4 = (ip4_header_t *)(vlib_buffer_get_current(b) + offset); + + //Copy offset and ip4 header + memcpy(b->data, packet, offset + sizeof(*ip4)); + //Copy data + memcpy(((u8*)(fip4)) + sizeof(*fip4), + packet + offset + sizeof(*fip4) + ptr, len); + } + b->current_length = offset + len + sizeof(*fip4); + + fip4->fragment_id = ip_frag_id; + fip4->flags_and_fragment_offset = clib_host_to_net_u16((ptr >> 3) + ip_frag_offset); + fip4->flags_and_fragment_offset |= clib_host_to_net_u16(((len != rem) || more) << 13); + // ((len0 != rem0) || more0) << 13 is optimization for + // ((len0 != rem0) || more0) ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0 + fip4->length = clib_host_to_net_u16(len + sizeof(*fip4)); + fip4->checksum = ip4_header_checksum(fip4); + + if(vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) { + //Encapsulating ipv4 header + ip4_header_t *encap_header4 = (ip4_header_t *)vlib_buffer_get_current(b); + encap_header4->length = clib_host_to_net_u16(b->current_length); + encap_header4->checksum = ip4_header_checksum(encap_header4); + } else if (vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER) { + //Encapsulating ipv6 header + ip6_header_t *encap_header6 = (ip6_header_t *)vlib_buffer_get_current(b); + encap_header6->payload_length = clib_host_to_net_u16(b->current_length - sizeof(*encap_header6)); + } + + rem -= len; + ptr += len; + } +} + + +static uword +ip4_frag (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t * error_node = vlib_node_get_runtime(vm, ip4_frag_node.index); + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + u32 frag_sent = 0, small_packets = 0; + u32 *buffer = 0; + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0, *frag_from, frag_left; + vlib_buffer_t *p0; + ip_frag_error_t error0; + ip4_frag_next_t next0; + + //Note: The packet is not enqueued now. + //It is instead put in a vector where other fragments + //will be put as well. + pi0 = from[0]; + from += 1; + n_left_from -= 1; + error0 = IP_FRAG_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + ip4_frag_do_fragment(vm, pi0, &buffer, &error0); + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + ip_frag_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof (*tr)); + tr->header_offset = vnet_buffer(p0)->ip_frag.header_offset; + tr->mtu = vnet_buffer(p0)->ip_frag.mtu; + tr->ipv6 = 0; + tr->n_fragments = vec_len(buffer); + tr->next = vnet_buffer(p0)->ip_frag.next_index; + } + + next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer(p0)->ip_frag.next_index : IP4_FRAG_NEXT_DROP; + frag_sent += vec_len(buffer); + small_packets += (vec_len(buffer) == 1); + + //Send fragments that were added in the frame + frag_from = buffer; + frag_left = vec_len(buffer); + while (frag_left > 0) { + while (frag_left > 0 && n_left_to_next > 0) { + u32 i; + i = to_next[0] = frag_from[0]; + frag_from += 1; + frag_left -= 1; + to_next += 1; + n_left_to_next -= 1; + + vlib_get_buffer(vm, i)->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, i, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + } + vec_reset_length(buffer); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + vec_free(buffer); + vlib_node_increment_counter(vm, ip4_frag_node.index, IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent); + vlib_node_increment_counter(vm, ip4_frag_node.index, IP_FRAG_ERROR_SMALL_PACKET, small_packets); + + return frame->n_vectors; +} + + +static void +ip6_frag_do_fragment(vlib_main_t *vm, u32 pi, u32 **buffer, ip_frag_error_t *error) +{ + vlib_buffer_t *p; + ip6_header_t *ip6_hdr; + ip6_frag_hdr_t *frag_hdr; + u8 *payload, *next_header; + + p = vlib_get_buffer(vm, pi); + + //Parsing the IPv6 headers + ip6_hdr = vlib_buffer_get_current(p) + vnet_buffer(p)->ip_frag.header_offset; + payload = (u8 *)(ip6_hdr + 1); + next_header = &ip6_hdr->protocol; + if (*next_header == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) { + next_header = payload; + payload += payload[1] * 8; + } + + if (*next_header == IP_PROTOCOL_IP6_DESTINATION_OPTIONS) { + next_header = payload; + payload += payload[1] * 8; + } + + if (*next_header == IP_PROTOCOL_IPV6_ROUTE) { + next_header = payload; + payload += payload[1] * 8; + } + + u8 has_more; + u16 initial_offset; + if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION) { + //The fragmentation header is already there + frag_hdr = (ip6_frag_hdr_t *)payload; + has_more = ip6_frag_hdr_more(frag_hdr); + initial_offset = ip6_frag_hdr_offset(frag_hdr); + } else { + //Insert a fragmentation header in the packet + u8 nh = *next_header; + *next_header = IP_PROTOCOL_IPV6_FRAGMENTATION; + vlib_buffer_advance(p, -sizeof(*frag_hdr)); + u8 *start = vlib_buffer_get_current(p); + memmove(start, start + sizeof(*frag_hdr), payload - (start + sizeof(*frag_hdr))); + frag_hdr = (ip6_frag_hdr_t *)(payload - sizeof(*frag_hdr)); + frag_hdr->identification = ++running_fragment_id; + frag_hdr->next_hdr = nh; + frag_hdr->rsv = 0; + has_more = 0; + initial_offset = 0; + } + payload = (u8 *)(frag_hdr + 1); + + u16 headers_len = payload - (u8 *)vlib_buffer_get_current(p); + u16 max_payload = vnet_buffer(p)->ip_frag.mtu - headers_len; + u16 rem = p->current_length - headers_len; + u16 ptr = 0; + + if(max_payload < 8) { + *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER; + return; + } + + while (rem) { + u32 bi; + vlib_buffer_t *b; + u16 len = (rem > max_payload)?(max_payload & ~0x7):rem; + rem -= len; + + if (ptr != 0) { + if (!vlib_buffer_alloc(vm, &bi, 1)) { + *error = IP_FRAG_ERROR_MEMORY; + return; + } + b = vlib_get_buffer(vm, bi); + vnet_buffer(b)->sw_if_index[VLIB_RX] = vnet_buffer(p)->sw_if_index[VLIB_RX]; + vnet_buffer(b)->sw_if_index[VLIB_TX] = vnet_buffer(p)->sw_if_index[VLIB_TX]; + memcpy(vlib_buffer_get_current(b), vlib_buffer_get_current(p), headers_len); + memcpy(vlib_buffer_get_current(b) + headers_len, payload + ptr, len); + frag_hdr = vlib_buffer_get_current(b) + headers_len - sizeof(*frag_hdr); + } else { + bi = pi; + b = vlib_get_buffer(vm, bi); + //frag_hdr already set here + } + + ip6_hdr = vlib_buffer_get_current(b) + vnet_buffer(p)->ip_frag.header_offset; + frag_hdr->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(initial_offset + (ptr >> 3), (rem || has_more)); + b->current_length = headers_len + len; + ip6_hdr->payload_length = clib_host_to_net_u16(b->current_length - vnet_buffer(p)->ip_frag.header_offset - sizeof(*ip6_hdr)); + + if(vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) { + //Encapsulating ipv4 header + ip4_header_t *encap_header4 = (ip4_header_t *)vlib_buffer_get_current(b); + encap_header4->length = clib_host_to_net_u16(b->current_length); + encap_header4->checksum = ip4_header_checksum(encap_header4); + } else if (vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER) { + //Encapsulating ipv6 header + ip6_header_t *encap_header6 = (ip6_header_t *)vlib_buffer_get_current(b); + encap_header6->payload_length = clib_host_to_net_u16(b->current_length - sizeof(*encap_header6)); + } + + vec_add1(*buffer, bi); + + ptr += len; + } +} + +static uword +ip6_frag (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_frag_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + u32 frag_sent = 0, small_packets = 0; + u32 *buffer = 0; + + while (n_left_from > 0) { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0, *frag_from, frag_left; + vlib_buffer_t * p0; + ip_frag_error_t error0; + ip6_frag_next_t next0; + + pi0 = from[0]; + from += 1; + n_left_from -= 1; + error0 = IP_FRAG_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + ip6_frag_do_fragment(vm, pi0, &buffer, &error0); + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + ip_frag_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof (*tr)); + tr->header_offset = vnet_buffer(p0)->ip_frag.header_offset; + tr->mtu = vnet_buffer(p0)->ip_frag.mtu; + tr->ipv6 = 1; + tr->n_fragments = vec_len(buffer); + tr->next = vnet_buffer(p0)->ip_frag.next_index; + } + + next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer(p0)->ip_frag.next_index : IP6_FRAG_NEXT_DROP; + frag_sent += vec_len(buffer); + small_packets += (vec_len(buffer) == 1); + + //Send fragments that were added in the frame + frag_from = buffer; + frag_left = vec_len(buffer); + while (frag_left > 0) { + while (frag_left > 0 && n_left_to_next > 0) { + u32 i; + i = to_next[0] = frag_from[0]; + frag_from += 1; + frag_left -= 1; + to_next += 1; + n_left_to_next -= 1; + + vlib_get_buffer(vm, i)->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, i, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + } + vec_reset_length(buffer); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + vec_free(buffer); + vlib_node_increment_counter(vm, ip6_frag_node.index, IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent); + vlib_node_increment_counter(vm, ip6_frag_node.index, IP_FRAG_ERROR_SMALL_PACKET, small_packets); + + return frame->n_vectors; +} + +static char * ip4_frag_error_strings[] = { +#define _(sym,string) string, + foreach_ip_frag_error +#undef _ +}; + +VLIB_REGISTER_NODE (ip4_frag_node) = { + .function = ip4_frag, + .name = IP4_FRAG_NODE_NAME, + .vector_size = sizeof (u32), + .format_trace = format_ip_frag_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = IP_FRAG_N_ERROR, + .error_strings = ip4_frag_error_strings, + + .n_next_nodes = IP4_FRAG_N_NEXT, + .next_nodes = { + [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_FRAG_NEXT_DROP] = "error-drop" + }, +}; + +VLIB_REGISTER_NODE (ip6_frag_node) = { + .function = ip6_frag, + .name = IP6_FRAG_NODE_NAME, + .vector_size = sizeof (u32), + .format_trace = format_ip_frag_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = IP_FRAG_N_ERROR, + .error_strings = ip4_frag_error_strings, + + .n_next_nodes = IP6_FRAG_N_NEXT, + .next_nodes = { + [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP6_FRAG_NEXT_DROP] = "error-drop" + }, +}; diff --git a/vnet/vnet/ip/ip_frag.h b/vnet/vnet/ip/ip_frag.h new file mode 100644 index 00000000000..04566904e5f --- /dev/null +++ b/vnet/vnet/ip/ip_frag.h @@ -0,0 +1,81 @@ +/*--------------------------------------------------------------------------- + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +/* + * IPv4 and IPv6 Fragmentation Nodes + * + * A packet sent to those nodes require the following + * buffer attributes to be set: + * ip_frag.header_offset : + * Where to find the IPv4 (or IPv6) header in the packet. Previous + * bytes are left untouched and copied in every fragment. The fragments + * are then appended. This option is used for fragmented packets + * that are encapsulated. + * ip_frag.mtu : + * Maximum size of IP packets, header included, but ignoring + * the 'ip_frag.header_offset' copied bytes. + * ip_frag.next_index : + * One of ip_frag_next_t, indicating to which exit node the fragments + * should be sent to. + * + */ + +#ifndef IP_FRAG_H +#define IP_FRAG_H + +#include <vnet/vnet.h> + +#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header +#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header + +#define IP4_FRAG_NODE_NAME "ip4-frag" +#define IP6_FRAG_NODE_NAME "ip6-frag" + +vlib_node_registration_t ip4_frag_node; +vlib_node_registration_t ip6_frag_node; + +typedef enum { + IP4_FRAG_NEXT_IP4_LOOKUP, + IP4_FRAG_NEXT_IP6_LOOKUP, + IP4_FRAG_NEXT_DROP, + IP4_FRAG_N_NEXT +} ip4_frag_next_t; + +typedef enum { + IP6_FRAG_NEXT_IP4_LOOKUP, + IP6_FRAG_NEXT_IP6_LOOKUP, + IP6_FRAG_NEXT_DROP, + IP6_FRAG_N_NEXT +} ip6_frag_next_t; + +#define foreach_ip_frag_error \ + /* Must be first. */ \ + _(NONE, "packet fragmented") \ + _(SMALL_PACKET, "packet smaller than MTU") \ + _(FRAGMENT_SENT, "number of sent fragments") \ + _(CANT_FRAGMENT_HEADER, "can't fragment header'") \ + _(DONT_FRAGMENT_SET, "can't fragment this packet'") \ + _(MALFORMED, "malformed packet") \ + _(MEMORY, "could not allocate buffer") \ + _(UNKNOWN, "unknown error") + +typedef enum { +#define _(sym,str) IP_FRAG_ERROR_##sym, + foreach_ip_frag_error +#undef _ + IP_FRAG_N_ERROR, + } ip_frag_error_t; + +#endif /* ifndef IP_FRAG_H */ diff --git a/vnet/vnet/ip/ip_init.c b/vnet/vnet/ip/ip_init.c new file mode 100644 index 00000000000..0654daa7685 --- /dev/null +++ b/vnet/vnet/ip/ip_init.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip_init.c: ip generic initialization + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +ip_main_t ip_main; + +clib_error_t * +ip_main_init (vlib_main_t * vm) +{ + ip_main_t * im = &ip_main; + clib_error_t * error = 0; + + memset (im, 0, sizeof (im[0])); + + { + ip_protocol_info_t * pi; + u32 i; + +#define ip_protocol(n,s) \ +do { \ + vec_add2 (im->protocol_infos, pi, 1); \ + pi->protocol = n; \ + pi->name = (u8 *) #s; \ +} while (0); + +#include "protocols.def" + +#undef ip_protocol + + im->protocol_info_by_name = hash_create_string (0, sizeof (uword)); + for (i = 0; i < vec_len (im->protocol_infos); i++) + { + pi = im->protocol_infos + i; + + hash_set_mem (im->protocol_info_by_name, pi->name, i); + hash_set (im->protocol_info_by_protocol, pi->protocol, i); + } + } + + { + tcp_udp_port_info_t * pi; + u32 i; + static char * port_names[] = + { +#define ip_port(s,n) #s, +#include "ports.def" +#undef ip_port + }; + static u16 ports[] = + { +#define ip_port(s,n) n, +#include "ports.def" +#undef ip_port + }; + + vec_resize (im->port_infos, ARRAY_LEN (port_names)); + im->port_info_by_name = hash_create_string (0, sizeof (uword)); + + for (i = 0; i < vec_len (im->port_infos); i++) + { + pi = im->port_infos + i; + pi->port = clib_host_to_net_u16 (ports[i]); + pi->name = (u8 *) port_names[i]; + hash_set_mem (im->port_info_by_name, pi->name, i); + hash_set (im->port_info_by_port, pi->port, i); + } + } + + if ((error = vlib_call_init_function (vm, vnet_main_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip4_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip6_init))) + return error; + + if ((error = vlib_call_init_function (vm, icmp4_init))) + return error; + + if ((error = vlib_call_init_function (vm, icmp6_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip4_hop_by_hop_init))) + return error; + +#if 0 + if ((error = vlib_call_init_function (vm, tcp_udp_lookup_init))) + return error; + +#endif + + if ((error = vlib_call_init_function (vm, udp_local_init))) + return error; + +#if 0 + if ((error = vlib_call_init_function (vm, tcp_init))) + return error; +#endif + + if ((error = vlib_call_init_function (vm, udp_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip_classify_init))) + return error; + + if ((error = vlib_call_init_function (vm, input_acl_init))) + return error; + + return error; +} + +VLIB_INIT_FUNCTION (ip_main_init); diff --git a/vnet/vnet/ip/ip_input_acl.c b/vnet/vnet/ip/ip_input_acl.c new file mode 100644 index 00000000000..75aa9ef818f --- /dev/null +++ b/vnet/vnet/ip/ip_input_acl.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <vnet/classify/vnet_classify.h> +#include <vnet/classify/input_acl.h> + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 table_index; + u32 offset; +} ip_inacl_trace_t; + +/* packet trace format function */ +static u8 * format_ip_inacl_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip_inacl_trace_t * t = va_arg (*args, ip_inacl_trace_t *); + + s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d", + t->sw_if_index, t->next_index, t->table_index, t->offset); + return s; +} + +vlib_node_registration_t ip4_inacl_node; +vlib_node_registration_t ip6_inacl_node; + +#define foreach_ip_inacl_error \ +_(MISS, "input ACL misses") \ +_(HIT, "input ACL hits") \ +_(CHAIN_HIT, "input ACL hits after chain walk") + +typedef enum { +#define _(sym,str) IP_INACL_ERROR_##sym, + foreach_ip_inacl_error +#undef _ + IP_INACL_N_ERROR, +} ip_inacl_error_t; + +static char * ip_inacl_error_strings[] = { +#define _(sym,string) string, + foreach_ip_inacl_error +#undef _ +}; + +static inline uword +ip_inacl_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int is_ip4) +{ + u32 n_left_from, * from, * to_next; + acl_next_index_t next_index; + input_acl_main_t * am = &input_acl_main; + vnet_classify_main_t * vcm = am->vnet_classify_main; + f64 now = vlib_time_now (vm); + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + input_acl_table_id_t tid; + vlib_node_runtime_t * error_node; + + if (is_ip4) + { + tid = INPUT_ACL_TABLE_IP4; + error_node = vlib_node_get_runtime (vm, ip4_input_node.index); + } + else + { + tid = INPUT_ACL_TABLE_IP6; + error_node = vlib_node_get_runtime (vm, ip6_input_node.index); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + /* First pass: compute hashes */ + + while (n_left_from > 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, bi1; + u8 * h0, * h1; + u32 sw_if_index0, sw_if_index1; + u32 table_index0, table_index1; + vnet_classify_table_t * t0, * t1; + + /* prefetch next iteration */ + { + vlib_buffer_t * p1, * p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + h1 = b1->data; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = am->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + table_index1 = am->classify_table_index_by_sw_if_index[tid][sw_if_index1]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + + t1 = pool_elt_at_index (vcm->tables, table_index1); + + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + vnet_buffer(b1)->l2_classify.hash = + vnet_classify_hash_packet (t1, (u8 *) h1); + + vnet_classify_prefetch_bucket (t1, vnet_buffer(b1)->l2_classify.hash); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + + vnet_buffer(b1)->l2_classify.table_index = table_index1; + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t * b0; + u32 bi0; + u8 * h0; + u32 sw_if_index0; + u32 table_index0; + vnet_classify_table_t * t0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = am->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = ACL_NEXT_INDEX_DENY; + u32 table_index0; + vnet_classify_table_t * t0; + vnet_classify_entry_t * e0; + u64 hash0; + u8 * h0; + u8 error0; + + /* Stride 3 seems to work best */ + if (PREDICT_TRUE (n_left_from > 3)) + { + vlib_buffer_t * p1 = vlib_get_buffer(vm, from[3]); + vnet_classify_table_t * tp1; + u32 table_index1; + u64 phash1; + + table_index1 = vnet_buffer(p1)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index1 != ~0)) + { + tp1 = pool_elt_at_index (vcm->tables, table_index1); + phash1 = vnet_buffer(p1)->l2_classify.hash; + vnet_classify_prefetch_entry (tp1, phash1); + } + } + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + table_index0 = vnet_buffer(b0)->l2_classify.table_index; + e0 = 0; + t0 = 0; + + vnet_get_config_data (am->vnet_config_main[tid], + &vnet_buffer(b0)->ip.current_config_index, + &next0, + /* # bytes of config data */ 0); + + if (PREDICT_TRUE(table_index0 != ~0)) + { + hash0 = vnet_buffer(b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, + now); + if (e0) + { + vlib_buffer_advance (b0, e0->advance); + + next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT)? + e0->next_index:next0; + + hits++; + + if (is_ip4) + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + IP4_ERROR_INACL_SESSION_DENY:IP4_ERROR_NONE; + else + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + IP6_ERROR_INACL_SESSION_DENY:IP6_ERROR_NONE; + b0->error = error_node->errors[error0]; + } + else + { + while (1) + { + if (PREDICT_TRUE(t0->next_table_index != ~0)) + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + else + { + next0 = (t0->miss_next_index < ACL_NEXT_INDEX_N_NEXT)? + t0->miss_next_index:next0; + + misses++; + + if (is_ip4) + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + IP4_ERROR_INACL_TABLE_MISS:IP4_ERROR_NONE; + else + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + IP6_ERROR_INACL_TABLE_MISS:IP6_ERROR_NONE; + b0->error = error_node->errors[error0]; + break; + } + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry + (t0, (u8 *) h0, hash0, now); + if (e0) + { + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT)? + e0->next_index:next0; + hits++; + chain_hits++; + + if (is_ip4) + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + IP4_ERROR_INACL_SESSION_DENY:IP4_ERROR_NONE; + else + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + IP6_ERROR_INACL_SESSION_DENY:IP6_ERROR_NONE; + b0->error = error_node->errors[error0]; + break; + } + } + } + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip_inacl_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + t->table_index = t0 ? t0 - vcm->tables : ~0; + t->offset = e0 ? vnet_classify_get_offset (t0, e0): ~0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + IP_INACL_ERROR_MISS, + misses); + vlib_node_increment_counter (vm, node->node_index, + IP_INACL_ERROR_HIT, + hits); + vlib_node_increment_counter (vm, node->node_index, + IP_INACL_ERROR_CHAIN_HIT, + chain_hits); + return frame->n_vectors; +} + +static uword +ip4_inacl (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip_inacl_inline (vm, node, frame, 1 /* is_ip4 */); +} + + +VLIB_REGISTER_NODE (ip4_inacl_node) = { + .function = ip4_inacl, + .name = "ip4-inacl", + .vector_size = sizeof (u32), + .format_trace = format_ip_inacl_trace, + .n_errors = ARRAY_LEN(ip_inacl_error_strings), + .error_strings = ip_inacl_error_strings, + + .n_next_nodes = ACL_NEXT_INDEX_N_NEXT, + .next_nodes = { + [ACL_NEXT_INDEX_DENY] = "error-drop", + }, +}; + +static uword +ip6_inacl (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip_inacl_inline (vm, node, frame, 0 /* is_ip4 */); +} + + +VLIB_REGISTER_NODE (ip6_inacl_node) = { + .function = ip6_inacl, + .name = "ip6-inacl", + .vector_size = sizeof (u32), + .format_trace = format_ip_inacl_trace, + .n_errors = ARRAY_LEN(ip_inacl_error_strings), + .error_strings = ip_inacl_error_strings, + + .n_next_nodes = ACL_NEXT_INDEX_N_NEXT, + .next_nodes = { + [ACL_NEXT_INDEX_DENY] = "error-drop", + }, +}; + +static clib_error_t * +ip_inacl_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (ip_inacl_init); + diff --git a/vnet/vnet/ip/ip_packet.h b/vnet/vnet/ip/ip_packet.h new file mode 100644 index 00000000000..fb9a23604e1 --- /dev/null +++ b/vnet/vnet/ip/ip_packet.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip_packet.h: packet format common between ip4 & ip6 + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_packet_h +#define included_ip_packet_h + +#include <vppinfra/byte_order.h> +#include <vppinfra/error.h> + +typedef enum ip_protocol { +#define ip_protocol(n,s) IP_PROTOCOL_##s = n, +#include "protocols.def" +#undef ip_protocol +} ip_protocol_t; + +/* TCP/UDP ports. */ +typedef enum { +#define ip_port(s,n) IP_PORT_##s = n, +#include "ports.def" +#undef ip_port +} ip_port_t; + +/* Classifies protocols into TCP, UDP, ICMP or other. */ +typedef enum { + IP_BUILTIN_PROTOCOL_UDP, + IP_BUILTIN_PROTOCOL_TCP, + IP_BUILTIN_PROTOCOL_ICMP, + IP_BUILTIN_PROTOCOL_UNKNOWN, +} ip_builtin_protocol_t; + +#define foreach_ip_builtin_multicast_group \ + _ (1, all_hosts_on_subnet) \ + _ (2, all_routers_on_subnet) \ + _ (4, dvmrp) \ + _ (5, ospf_all_routers) \ + _ (6, ospf_designated_routers) \ + _ (13, pim) \ + _ (18, vrrp) \ + _ (102, hsrp) \ + _ (22, igmp_v3) + +typedef enum { +#define _(n,f) IP_MULTICAST_GROUP_##f = n, + foreach_ip_builtin_multicast_group +#undef _ +} ip_multicast_group_t; + +/* IP checksum support. */ + +/* Incremental checksum update. */ +typedef uword ip_csum_t; + +always_inline ip_csum_t +ip_csum_with_carry (ip_csum_t sum, ip_csum_t x) +{ + ip_csum_t t = sum + x; + return t + (t < x); +} + +/* Update checksum changing field at even byte offset from x -> 0. */ +always_inline ip_csum_t +ip_csum_add_even (ip_csum_t c, ip_csum_t x) +{ + ip_csum_t d; + + d = c - x; + + /* Fold in carry from high bit. */ + d -= d > c; + + ASSERT (ip_csum_with_carry (d, x) == c); + + return d; +} + +/* Update checksum changing field at even byte offset from 0 -> x. */ +always_inline ip_csum_t +ip_csum_sub_even (ip_csum_t c, ip_csum_t x) +{ return ip_csum_with_carry (c, x); } + +always_inline ip_csum_t +ip_csum_update_inline (ip_csum_t sum, ip_csum_t old, ip_csum_t new, + u32 field_byte_offset, u32 field_n_bytes) +{ + /* For even 1-byte fields on big-endian and odd 1-byte fields on little endian + we need to shift byte into place for checksum. */ + if ((field_n_bytes % 2) + && (field_byte_offset % 2) == CLIB_ARCH_IS_LITTLE_ENDIAN) + { + old = old << 8; + new = new << 8; + } + sum = ip_csum_sub_even (sum, old); + sum = ip_csum_add_even (sum, new); + return sum; +} + +#define ip_csum_update(sum,old,new,type,field) \ + ip_csum_update_inline ((sum), (old), (new), \ + STRUCT_OFFSET_OF (type, field), \ + STRUCT_SIZE_OF (type, field)) + +always_inline u16 ip_csum_fold (ip_csum_t c) +{ + /* Reduce to 16 bits. */ +#if uword_bits == 64 + c = (c & (ip_csum_t) 0xffffffff) + (c >> (ip_csum_t) 32); + c = (c & 0xffff) + (c >> 16); +#endif + + c = (c & 0xffff) + (c >> 16); + c = (c & 0xffff) + (c >> 16); + + return c; +} + +/* Copy data and checksum at the same time. */ +ip_csum_t ip_csum_and_memcpy (ip_csum_t sum, void * dst, void * src, uword n_bytes); + +always_inline u16 +ip_csum_and_memcpy_fold (ip_csum_t sum, void * dst) +{ + uword n_zero; + ip_csum_t * dst_even; + + dst_even = uword_to_pointer + (pointer_to_uword (dst) &~ (sizeof (sum) - 1), + ip_csum_t *); + + if ((n_zero = dst - (void *) dst_even)) + { + u8 * d8 = dst; + uword i; + + for (i = 0; i < n_zero; i++) + d8[i] = 0; + + sum = ip_csum_with_carry (sum, dst_even[0]); + } + + return ip_csum_fold (sum); +} + +/* Checksum routine. */ +ip_csum_t ip_incremental_checksum (ip_csum_t sum, void * data, uword n_bytes); + +#endif /* included_ip_packet_h */ diff --git a/vnet/vnet/ip/lookup.c b/vnet/vnet/ip/lookup.c new file mode 100644 index 00000000000..80f0a33e731 --- /dev/null +++ b/vnet/vnet/ip/lookup.c @@ -0,0 +1,2271 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip_lookup.c: ip4/6 adjacency and lookup table managment + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vppinfra/math.h> /* for fabs */ +#include <vnet/ip/ip.h> + +static void +ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index); + +always_inline void +ip_poison_adjacencies (ip_adjacency_t * adj, uword n_adj) +{ + if (CLIB_DEBUG > 0) + memset (adj, 0xfe, n_adj * sizeof (adj[0])); +} + +/* Create new block of given number of contiguous adjacencies. */ +ip_adjacency_t * +ip_add_adjacency (ip_lookup_main_t * lm, + ip_adjacency_t * copy_adj, + u32 n_adj, + u32 * adj_index_return) +{ + ip_adjacency_t * adj; + u32 ai, i, handle; + + ai = heap_alloc (lm->adjacency_heap, n_adj, handle); + adj = heap_elt_at_index (lm->adjacency_heap, ai); + + ip_poison_adjacencies (adj, n_adj); + + /* Validate adjacency counters. */ + vlib_validate_combined_counter (&lm->adjacency_counters, ai + n_adj - 1); + + for (i = 0; i < n_adj; i++) + { + /* Make sure certain fields are always initialized. */ + adj[i].rewrite_header.sw_if_index = ~0; + adj[i].explicit_fib_index = ~0; + adj[i].mcast_group_index = ~0; + adj[i].classify_table_index = ~0; + adj[i].saved_lookup_next_index = 0; + + if (copy_adj) + adj[i] = copy_adj[i]; + + adj[i].heap_handle = handle; + adj[i].n_adj = n_adj; + + /* Zero possibly stale counters for re-used adjacencies. */ + vlib_zero_combined_counter (&lm->adjacency_counters, ai + i); + } + + *adj_index_return = ai; + return adj; +} + +static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_multipath_adjacency) +{ + ip_adjacency_t * adj; + uword handle; + + ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1); + + adj = ip_get_adjacency (lm, adj_index); + handle = adj->heap_handle; + + if (delete_multipath_adjacency) + ip_multipath_del_adjacency (lm, adj_index); + + ip_poison_adjacencies (adj, adj->n_adj); + + heap_dealloc (lm->adjacency_heap, handle); +} + +void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index) +{ ip_del_adjacency2 (lm, adj_index, /* delete_multipath_adjacency */ 1); } + +static int +next_hop_sort_by_weight (ip_multipath_next_hop_t * n1, + ip_multipath_next_hop_t * n2) +{ + int cmp = (int) n1->weight - (int) n2->weight; + return (cmp == 0 + ? (int) n1->next_hop_adj_index - (int) n2->next_hop_adj_index + : (cmp > 0 ? +1 : -1)); +} + +/* Given next hop vector is over-written with normalized one with sorted weights and + with weights corresponding to the number of adjacencies for each next hop. + Returns number of adjacencies in block. */ +static u32 ip_multipath_normalize_next_hops (ip_lookup_main_t * lm, + ip_multipath_next_hop_t * raw_next_hops, + ip_multipath_next_hop_t ** normalized_next_hops) +{ + ip_multipath_next_hop_t * nhs; + uword n_nhs, n_adj, n_adj_left, i; + f64 sum_weight, norm, error; + + n_nhs = vec_len (raw_next_hops); + ASSERT (n_nhs > 0); + if (n_nhs == 0) + return 0; + + /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */ + nhs = *normalized_next_hops; + vec_validate (nhs, 2*n_nhs - 1); + + /* Fast path: 1 next hop in block. */ + n_adj = n_nhs; + if (n_nhs == 1) + { + nhs[0] = raw_next_hops[0]; + nhs[0].weight = 1; + _vec_len (nhs) = 1; + goto done; + } + + else if (n_nhs == 2) + { + int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0; + + /* Fast sort. */ + nhs[0] = raw_next_hops[cmp]; + nhs[1] = raw_next_hops[cmp ^ 1]; + + /* Fast path: equal cost multipath with 2 next hops. */ + if (nhs[0].weight == nhs[1].weight) + { + nhs[0].weight = nhs[1].weight = 1; + _vec_len (nhs) = 2; + goto done; + } + } + else + { + memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0])); + qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight); + } + + /* Find total weight to normalize weights. */ + sum_weight = 0; + for (i = 0; i < n_nhs; i++) + sum_weight += nhs[i].weight; + + /* In the unlikely case that all weights are given as 0, set them all to 1. */ + if (sum_weight == 0) + { + for (i = 0; i < n_nhs; i++) + nhs[i].weight = 1; + sum_weight = n_nhs; + } + + /* Save copies of all next hop weights to avoid being overwritten in loop below. */ + for (i = 0; i < n_nhs; i++) + nhs[n_nhs + i].weight = nhs[i].weight; + + /* Try larger and larger power of 2 sized adjacency blocks until we + find one where traffic flows to within 1% of specified weights. */ + for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2) + { + error = 0; + + norm = n_adj / sum_weight; + n_adj_left = n_adj; + for (i = 0; i < n_nhs; i++) + { + f64 nf = nhs[n_nhs + i].weight * norm; /* use saved weights */ + word n = flt_round_nearest (nf); + + n = n > n_adj_left ? n_adj_left : n; + n_adj_left -= n; + error += fabs (nf - n); + nhs[i].weight = n; + } + + nhs[0].weight += n_adj_left; + + /* Less than 5% average error per adjacency with this size adjacency block? */ + if (error <= lm->multipath_next_hop_error_tolerance*n_adj) + { + /* Truncate any next hops with zero weight. */ + _vec_len (nhs) = i; + break; + } + } + + done: + /* Save vector for next call. */ + *normalized_next_hops = nhs; + return n_adj; +} + +always_inline uword +ip_next_hop_hash_key_from_handle (uword handle) +{ return 1 + 2*handle; } + +always_inline uword +ip_next_hop_hash_key_is_heap_handle (uword k) +{ return k & 1; } + +always_inline uword +ip_next_hop_hash_key_get_heap_handle (uword k) +{ + ASSERT (ip_next_hop_hash_key_is_heap_handle (k)); + return k / 2; +} + +static u32 +ip_multipath_adjacency_get (ip_lookup_main_t * lm, + ip_multipath_next_hop_t * raw_next_hops, + uword create_if_non_existent) +{ + uword * p; + u32 i, j, n_adj, adj_index, adj_heap_handle; + ip_adjacency_t * adj, * copy_adj; + ip_multipath_next_hop_t * nh, * nhs; + ip_multipath_adjacency_t * madj; + + n_adj = ip_multipath_normalize_next_hops (lm, raw_next_hops, &lm->next_hop_hash_lookup_key_normalized); + nhs = lm->next_hop_hash_lookup_key_normalized; + + /* Basic sanity. */ + ASSERT (n_adj >= vec_len (raw_next_hops)); + + /* Use normalized next hops to see if we've seen a block equivalent to this one before. */ + p = hash_get_mem (lm->multipath_adjacency_by_next_hops, nhs); + if (p) + return p[0]; + + if (! create_if_non_existent) + return 0; + + adj = ip_add_adjacency (lm, /* copy_adj */ 0, n_adj, &adj_index); + adj_heap_handle = adj[0].heap_handle; + + /* Fill in adjacencies in block based on corresponding next hop adjacencies. */ + i = 0; + vec_foreach (nh, nhs) + { + copy_adj = ip_get_adjacency (lm, nh->next_hop_adj_index); + for (j = 0; j < nh->weight; j++) + { + adj[i] = copy_adj[0]; + adj[i].heap_handle = adj_heap_handle; + adj[i].n_adj = n_adj; + i++; + } + } + + /* All adjacencies should have been initialized. */ + ASSERT (i == n_adj); + + vec_validate (lm->multipath_adjacencies, adj_heap_handle); + madj = vec_elt_at_index (lm->multipath_adjacencies, adj_heap_handle); + + madj->adj_index = adj_index; + madj->n_adj_in_block = n_adj; + madj->reference_count = 0; /* caller will set to one. */ + + madj->normalized_next_hops.count = vec_len (nhs); + madj->normalized_next_hops.heap_offset + = heap_alloc (lm->next_hop_heap, vec_len (nhs), + madj->normalized_next_hops.heap_handle); + memcpy (lm->next_hop_heap + madj->normalized_next_hops.heap_offset, + nhs, vec_bytes (nhs)); + + hash_set (lm->multipath_adjacency_by_next_hops, + ip_next_hop_hash_key_from_handle (madj->normalized_next_hops.heap_handle), + madj - lm->multipath_adjacencies); + + madj->unnormalized_next_hops.count = vec_len (raw_next_hops); + madj->unnormalized_next_hops.heap_offset + = heap_alloc (lm->next_hop_heap, vec_len (raw_next_hops), + madj->unnormalized_next_hops.heap_handle); + memcpy (lm->next_hop_heap + madj->unnormalized_next_hops.heap_offset, + raw_next_hops, vec_bytes (raw_next_hops)); + + ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); + + return adj_heap_handle; +} + +/* Returns 0 for next hop not found. */ +u32 +ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm, + u32 is_del, + u32 old_mp_adj_index, + u32 next_hop_adj_index, + u32 next_hop_weight, + u32 * new_mp_adj_index) +{ + ip_multipath_adjacency_t * mp_old, * mp_new; + ip_multipath_next_hop_t * nh, * nhs, * hash_nhs; + u32 n_nhs, i_nh; + + mp_new = mp_old = 0; + n_nhs = 0; + i_nh = 0; + nhs = 0; + + /* If old multipath adjacency is valid, find requested next hop. */ + if (old_mp_adj_index < vec_len (lm->multipath_adjacencies) + && lm->multipath_adjacencies[old_mp_adj_index].normalized_next_hops.count > 0) + { + mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); + + nhs = vec_elt_at_index (lm->next_hop_heap, mp_old->unnormalized_next_hops.heap_offset); + n_nhs = mp_old->unnormalized_next_hops.count; + + /* Linear search: ok since n_next_hops is small. */ + for (i_nh = 0; i_nh < n_nhs; i_nh++) + if (nhs[i_nh].next_hop_adj_index == next_hop_adj_index) + break; + + /* Given next hop not found. */ + if (i_nh >= n_nhs && is_del) + return 0; + } + + hash_nhs = lm->next_hop_hash_lookup_key; + if (hash_nhs) + _vec_len (hash_nhs) = 0; + + if (is_del) + { + if (n_nhs > 1) + { + /* Prepare lookup key for multipath with target next hop deleted. */ + if (i_nh > 0) + vec_add (hash_nhs, nhs + 0, i_nh); + if (i_nh + 1 < n_nhs) + vec_add (hash_nhs, nhs + i_nh + 1, n_nhs - (i_nh + 1)); + } + } + else /* it's an add. */ + { + /* If next hop is already there with the same weight, we have nothing to do. */ + if (i_nh < n_nhs && nhs[i_nh].weight == next_hop_weight) + { + new_mp_adj_index[0] = ~0; + goto done; + } + + /* Copy old next hops to lookup key vector. */ + if (n_nhs > 0) + vec_add (hash_nhs, nhs, n_nhs); + + if (i_nh < n_nhs) + { + /* Change weight of existing next hop. */ + nh = vec_elt_at_index (hash_nhs, i_nh); + } + else + { + /* Add a new next hop. */ + vec_add2 (hash_nhs, nh, 1); + nh->next_hop_adj_index = next_hop_adj_index; + } + + /* Set weight for added or old next hop. */ + nh->weight = next_hop_weight; + } + + if (vec_len (hash_nhs) > 0) + { + u32 tmp = ip_multipath_adjacency_get (lm, hash_nhs, + /* create_if_non_existent */ 1); + if (tmp != ~0) + mp_new = vec_elt_at_index (lm->multipath_adjacencies, tmp); + + /* Fetch again since pool may have moved. */ + if (mp_old) + mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); + } + + new_mp_adj_index[0] = mp_new ? mp_new - lm->multipath_adjacencies : ~0; + + if (mp_new != mp_old) + { + if (mp_old) + { + ASSERT (mp_old->reference_count > 0); + mp_old->reference_count -= 1; + } + if (mp_new) + mp_new->reference_count += 1; + } + + if (mp_old && mp_old->reference_count == 0) + ip_multipath_adjacency_free (lm, mp_old); + + done: + /* Save key vector next call. */ + lm->next_hop_hash_lookup_key = hash_nhs; + + return 1; +} + +static void +ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index) +{ + ip_adjacency_t * adj = ip_get_adjacency (lm, del_adj_index); + ip_multipath_adjacency_t * madj, * new_madj; + ip_multipath_next_hop_t * nhs, * hash_nhs; + u32 i, n_nhs, madj_index, new_madj_index; + + if (adj->heap_handle >= vec_len (lm->multipath_adjacencies)) + return; + + vec_validate (lm->adjacency_remap_table, vec_len (lm->adjacency_heap) - 1); + + for (madj_index = 0; madj_index < vec_len (lm->multipath_adjacencies); madj_index++) + { + madj = vec_elt_at_index (lm->multipath_adjacencies, madj_index); + if (madj->n_adj_in_block == 0) + continue; + + nhs = heap_elt_at_index (lm->next_hop_heap, madj->unnormalized_next_hops.heap_offset); + n_nhs = madj->unnormalized_next_hops.count; + for (i = 0; i < n_nhs; i++) + if (nhs[i].next_hop_adj_index == del_adj_index) + break; + + /* del_adj_index not found in unnormalized_next_hops? We're done. */ + if (i >= n_nhs) + continue; + + new_madj = 0; + if (n_nhs > 1) + { + hash_nhs = lm->next_hop_hash_lookup_key; + if (hash_nhs) + _vec_len (hash_nhs) = 0; + if (i > 0) + vec_add (hash_nhs, nhs + 0, i); + if (i + 1 < n_nhs) + vec_add (hash_nhs, nhs + i + 1, n_nhs - (i + 1)); + + new_madj_index = ip_multipath_adjacency_get (lm, hash_nhs, /* create_if_non_existent */ 1); + + lm->next_hop_hash_lookup_key = hash_nhs; + + if (new_madj_index == madj_index) + continue; + + new_madj = vec_elt_at_index (lm->multipath_adjacencies, new_madj_index); + } + + lm->adjacency_remap_table[madj->adj_index] = new_madj ? 1 + new_madj->adj_index : ~0; + lm->n_adjacency_remaps += 1; + ip_multipath_adjacency_free (lm, madj); + } +} + +void +ip_multipath_adjacency_free (ip_lookup_main_t * lm, + ip_multipath_adjacency_t * a) +{ + hash_unset (lm->multipath_adjacency_by_next_hops, + ip_next_hop_hash_key_from_handle (a->normalized_next_hops.heap_handle)); + heap_dealloc (lm->next_hop_heap, a->normalized_next_hops.heap_handle); + heap_dealloc (lm->next_hop_heap, a->unnormalized_next_hops.heap_handle); + + ip_del_adjacency2 (lm, a->adj_index, a->reference_count == 0); + memset (a, 0, sizeof (a[0])); +} + +always_inline ip_multipath_next_hop_t * +ip_next_hop_hash_key_get_next_hops (ip_lookup_main_t * lm, uword k, + uword * n_next_hops) +{ + ip_multipath_next_hop_t * nhs; + uword n_nhs; + if (ip_next_hop_hash_key_is_heap_handle (k)) + { + uword handle = ip_next_hop_hash_key_get_heap_handle (k); + nhs = heap_elt_with_handle (lm->next_hop_heap, handle); + n_nhs = heap_len (lm->next_hop_heap, handle); + } + else + { + nhs = uword_to_pointer (k, ip_multipath_next_hop_t *); + n_nhs = vec_len (nhs); + } + *n_next_hops = n_nhs; + return nhs; +} + +static uword +ip_next_hop_hash_key_sum (hash_t * h, uword key0) +{ + ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *); + ip_multipath_next_hop_t * k0; + uword n0; + + k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0); + return hash_memory (k0, n0 * sizeof (k0[0]), /* seed */ n0); +} + +static uword +ip_next_hop_hash_key_equal (hash_t * h, uword key0, uword key1) +{ + ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *); + ip_multipath_next_hop_t * k0, * k1; + uword n0, n1; + + k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0); + k1 = ip_next_hop_hash_key_get_next_hops (lm, key1, &n1); + + return n0 == n1 && ! memcmp (k0, k1, n0 * sizeof (k0[0])); +} + +clib_error_t * +ip_interface_address_add_del (ip_lookup_main_t * lm, + u32 sw_if_index, + void * addr_fib, + u32 address_length, + u32 is_del, + u32 * result_if_address_index) +{ + vnet_main_t * vnm = vnet_get_main(); + ip_interface_address_t * a, * prev, * next; + uword * p = mhash_get (&lm->address_to_if_address_index, addr_fib); + + vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index, sw_if_index, ~0); + a = p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0; + + /* Verify given length. */ + if ((a && (address_length != a->address_length)) || (address_length == 0)) + { + vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH; + return clib_error_create + ( "%U wrong length (expected %d) for interface %U", + lm->format_address_and_length, addr_fib, + address_length, a? a->address_length : -1, + format_vnet_sw_if_index_name, vnm, sw_if_index); + } + + if (is_del) + { + if (!a) + { + vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, sw_if_index); + vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE; + return clib_error_create ("%U not found for interface %U", + lm->format_address_and_length, + addr_fib, address_length, + format_vnet_sw_interface_name, vnm, si); + } + + if (a->prev_this_sw_interface != ~0) + { + prev = pool_elt_at_index (lm->if_address_pool, a->prev_this_sw_interface); + prev->next_this_sw_interface = a->next_this_sw_interface; + } + if (a->next_this_sw_interface != ~0) + { + next = pool_elt_at_index (lm->if_address_pool, a->next_this_sw_interface); + next->prev_this_sw_interface = a->prev_this_sw_interface; + + if(a->prev_this_sw_interface == ~0) + lm->if_address_pool_index_by_sw_if_index[sw_if_index] = a->next_this_sw_interface; + } + + if ((a->next_this_sw_interface == ~0) && (a->prev_this_sw_interface == ~0)) + lm->if_address_pool_index_by_sw_if_index[sw_if_index] = ~0; + + mhash_unset (&lm->address_to_if_address_index, addr_fib, + /* old_value */ 0); + pool_put (lm->if_address_pool, a); + + if (result_if_address_index) + *result_if_address_index = ~0; + } + + else if (! a) + { + u32 pi; /* previous index */ + u32 ai; + u32 hi; /* head index */ + + pool_get (lm->if_address_pool, a); + memset (a, ~0, sizeof (a[0])); + ai = a - lm->if_address_pool; + + hi = pi = lm->if_address_pool_index_by_sw_if_index[sw_if_index]; + prev = 0; + while (pi != (u32)~0) + { + prev = pool_elt_at_index(lm->if_address_pool, pi); + pi = prev->next_this_sw_interface; + } + pi = prev ? prev - lm->if_address_pool : (u32)~0; + + a->address_key = mhash_set (&lm->address_to_if_address_index, + addr_fib, ai, /* old_value */ 0); + a->address_length = address_length; + a->sw_if_index = sw_if_index; + a->flags = 0; + a->prev_this_sw_interface = pi; + a->next_this_sw_interface = ~0; + if (prev) + prev->next_this_sw_interface = ai; + + lm->if_address_pool_index_by_sw_if_index[sw_if_index] = + (hi != ~0) ? hi : ai; + if (result_if_address_index) + *result_if_address_index = ai; + } + else + { + if (result_if_address_index) + *result_if_address_index = a - lm->if_address_pool; + } + + + return /* no error */ 0; +} + +void serialize_vec_ip_adjacency (serialize_main_t * m, va_list * va) +{ + ip_adjacency_t * a = va_arg (*va, ip_adjacency_t *); + u32 n = va_arg (*va, u32); + u32 i; + for (i = 0; i < n; i++) + { + serialize_integer (m, a[i].heap_handle, sizeof (a[i].heap_handle)); + serialize_integer (m, a[i].n_adj, sizeof (a[i].n_adj)); + serialize_integer (m, a[i].lookup_next_index, sizeof (a[i].lookup_next_index_as_int)); + switch (a[i].lookup_next_index) + { + case IP_LOOKUP_NEXT_LOCAL: + serialize_integer (m, a[i].if_address_index, sizeof (a[i].if_address_index)); + break; + + case IP_LOOKUP_NEXT_ARP: + serialize_integer (m, a[i].if_address_index, sizeof (a[i].if_address_index)); + serialize_integer (m, a[i].rewrite_header.sw_if_index, sizeof (a[i].rewrite_header.sw_if_index)); + break; + + case IP_LOOKUP_NEXT_REWRITE: + serialize (m, serialize_vnet_rewrite, &a[i].rewrite_header, sizeof (a[i].rewrite_data)); + break; + + default: + /* nothing else to serialize. */ + break; + } + } +} + +void unserialize_vec_ip_adjacency (serialize_main_t * m, va_list * va) +{ + ip_adjacency_t * a = va_arg (*va, ip_adjacency_t *); + u32 n = va_arg (*va, u32); + u32 i; + ip_poison_adjacencies (a, n); + for (i = 0; i < n; i++) + { + unserialize_integer (m, &a[i].heap_handle, sizeof (a[i].heap_handle)); + unserialize_integer (m, &a[i].n_adj, sizeof (a[i].n_adj)); + unserialize_integer (m, &a[i].lookup_next_index_as_int, sizeof (a[i].lookup_next_index_as_int)); + switch (a[i].lookup_next_index) + { + case IP_LOOKUP_NEXT_LOCAL: + unserialize_integer (m, &a[i].if_address_index, sizeof (a[i].if_address_index)); + break; + + case IP_LOOKUP_NEXT_ARP: + unserialize_integer (m, &a[i].if_address_index, sizeof (a[i].if_address_index)); + unserialize_integer (m, &a[i].rewrite_header.sw_if_index, sizeof (a[i].rewrite_header.sw_if_index)); + break; + + case IP_LOOKUP_NEXT_REWRITE: + unserialize (m, unserialize_vnet_rewrite, &a[i].rewrite_header, sizeof (a[i].rewrite_data)); + break; + + default: + /* nothing else to unserialize. */ + break; + } + } +} + +static void serialize_vec_ip_multipath_next_hop (serialize_main_t * m, va_list * va) +{ + ip_multipath_next_hop_t * nh = va_arg (*va, ip_multipath_next_hop_t *); + u32 n = va_arg (*va, u32); + u32 i; + for (i = 0; i < n; i++) + { + serialize_integer (m, nh[i].next_hop_adj_index, sizeof (nh[i].next_hop_adj_index)); + serialize_integer (m, nh[i].weight, sizeof (nh[i].weight)); + } +} + +static void unserialize_vec_ip_multipath_next_hop (serialize_main_t * m, va_list * va) +{ + ip_multipath_next_hop_t * nh = va_arg (*va, ip_multipath_next_hop_t *); + u32 n = va_arg (*va, u32); + u32 i; + for (i = 0; i < n; i++) + { + unserialize_integer (m, &nh[i].next_hop_adj_index, sizeof (nh[i].next_hop_adj_index)); + unserialize_integer (m, &nh[i].weight, sizeof (nh[i].weight)); + } +} + +static void serialize_vec_ip_multipath_adjacency (serialize_main_t * m, va_list * va) +{ + ip_multipath_adjacency_t * a = va_arg (*va, ip_multipath_adjacency_t *); + u32 n = va_arg (*va, u32); + u32 i; + for (i = 0; i < n; i++) + { +#define foreach_ip_multipath_adjacency_field \ + _ (adj_index) _ (n_adj_in_block) _ (reference_count) \ + _ (normalized_next_hops.count) \ + _ (normalized_next_hops.heap_offset) \ + _ (normalized_next_hops.heap_handle) \ + _ (unnormalized_next_hops.count) \ + _ (unnormalized_next_hops.heap_offset) \ + _ (unnormalized_next_hops.heap_handle) + +#define _(f) serialize_integer (m, a[i].f, sizeof (a[i].f)); + foreach_ip_multipath_adjacency_field; +#undef _ + } +} + +static void unserialize_vec_ip_multipath_adjacency (serialize_main_t * m, va_list * va) +{ + ip_multipath_adjacency_t * a = va_arg (*va, ip_multipath_adjacency_t *); + u32 n = va_arg (*va, u32); + u32 i; + for (i = 0; i < n; i++) + { +#define _(f) unserialize_integer (m, &a[i].f, sizeof (a[i].f)); + foreach_ip_multipath_adjacency_field; +#undef _ + } +} + +void serialize_ip_lookup_main (serialize_main_t * m, va_list * va) +{ + ip_lookup_main_t * lm = va_arg (*va, ip_lookup_main_t *); + + /* If this isn't true you need to call e.g. ip4_maybe_remap_adjacencies + to make it true. */ + ASSERT (lm->n_adjacency_remaps == 0); + + serialize (m, serialize_heap, lm->adjacency_heap, serialize_vec_ip_adjacency); + + serialize (m, serialize_heap, lm->next_hop_heap, serialize_vec_ip_multipath_next_hop); + vec_serialize (m, lm->multipath_adjacencies, serialize_vec_ip_multipath_adjacency); + + /* Adjacency counters (FIXME disabled for now). */ + if (0) + serialize (m, serialize_vlib_combined_counter_main, &lm->adjacency_counters, /* incremental */ 0); +} + +void unserialize_ip_lookup_main (serialize_main_t * m, va_list * va) +{ + ip_lookup_main_t * lm = va_arg (*va, ip_lookup_main_t *); + + unserialize (m, unserialize_heap, &lm->adjacency_heap, unserialize_vec_ip_adjacency); + unserialize (m, unserialize_heap, &lm->next_hop_heap, unserialize_vec_ip_multipath_next_hop); + vec_unserialize (m, &lm->multipath_adjacencies, unserialize_vec_ip_multipath_adjacency); + + /* Build hash table from unserialized data. */ + { + ip_multipath_adjacency_t * a; + + vec_foreach (a, lm->multipath_adjacencies) + { + if (a->n_adj_in_block > 0 && a->reference_count > 0) + hash_set (lm->multipath_adjacency_by_next_hops, + ip_next_hop_hash_key_from_handle (a->normalized_next_hops.heap_handle), + a - lm->multipath_adjacencies); + } + } + + /* Validate adjacency counters. */ + vlib_validate_combined_counter (&lm->adjacency_counters, + vec_len (lm->adjacency_heap) - 1); + + /* Adjacency counters (FIXME disabled for now). */ + if (0) + unserialize (m, unserialize_vlib_combined_counter_main, &lm->adjacency_counters, /* incremental */ 0); +} + +void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6) +{ + ip_adjacency_t * adj; + + /* Hand-craft special miss adjacency to use when nothing matches in the + routing table. Same for drop adjacency. */ + adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->miss_adj_index); + adj->lookup_next_index = IP_LOOKUP_NEXT_MISS; + ASSERT (lm->miss_adj_index == IP_LOOKUP_MISS_ADJ_INDEX); + + adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->drop_adj_index); + adj->lookup_next_index = IP_LOOKUP_NEXT_DROP; + + adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->local_adj_index); + adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; + adj->if_address_index = ~0; + + if (! lm->fib_result_n_bytes) + lm->fib_result_n_bytes = sizeof (uword); + + lm->multipath_adjacency_by_next_hops + = hash_create2 (/* elts */ 0, + /* user */ pointer_to_uword (lm), + /* value_bytes */ sizeof (uword), + ip_next_hop_hash_key_sum, + ip_next_hop_hash_key_equal, + /* format pair/arg */ + 0, 0); + + /* 1% max error tolerance for multipath. */ + lm->multipath_next_hop_error_tolerance = .01; + + lm->is_ip6 = is_ip6; + if (is_ip6) + { + lm->format_address_and_length = format_ip6_address_and_length; + mhash_init (&lm->address_to_if_address_index, sizeof (uword), + sizeof (ip6_address_fib_t)); + } + else + { + lm->format_address_and_length = format_ip4_address_and_length; + mhash_init (&lm->address_to_if_address_index, sizeof (uword), + sizeof (ip4_address_fib_t)); + } + + { + int i; + + /* Setup all IP protocols to be punted and builtin-unknown. */ + for (i = 0; i < 256; i++) + { + lm->local_next_by_ip_protocol[i] = IP_LOCAL_NEXT_PUNT; + lm->builtin_protocol_by_ip_protocol[i] = IP_BUILTIN_PROTOCOL_UNKNOWN; + } +#if 0 + /* Eliot's TCP doesn't actually work */ + lm->local_next_by_ip_protocol[IP_PROTOCOL_TCP] = IP_LOCAL_NEXT_TCP_LOOKUP; + lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_TCP] = + IP_BUILTIN_PROTOCOL_TCP; +#endif + + lm->local_next_by_ip_protocol[IP_PROTOCOL_UDP] = IP_LOCAL_NEXT_UDP_LOOKUP; + lm->local_next_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : IP_PROTOCOL_ICMP] = IP_LOCAL_NEXT_ICMP; + lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_UDP] = IP_BUILTIN_PROTOCOL_UDP; + lm->builtin_protocol_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : IP_PROTOCOL_ICMP] = IP_BUILTIN_PROTOCOL_ICMP; + } +} + +u8 * format_ip_flow_hash_config (u8 * s, va_list * args) +{ + u32 flow_hash_config = va_arg (*args, u32); + +#define _(n,v) if (flow_hash_config & v) s = format (s, "%s ", #n); + foreach_flow_hash_bit; +#undef _ + + return s; +} + +u8 * format_ip_lookup_next (u8 * s, va_list * args) +{ + ip_lookup_next_t n = va_arg (*args, ip_lookup_next_t); + char * t = 0; + + switch (n) + { + default: + s = format (s, "unknown %d", n); + return s; + + case IP_LOOKUP_NEXT_MISS: t = "miss"; break; + case IP_LOOKUP_NEXT_DROP: t = "drop"; break; + case IP_LOOKUP_NEXT_PUNT: t = "punt"; break; + case IP_LOOKUP_NEXT_LOCAL: t = "local"; break; + case IP_LOOKUP_NEXT_ARP: t = "arp"; break; + case IP_LOOKUP_NEXT_CLASSIFY: t = "classify"; break; + case IP_LOOKUP_NEXT_MAP: t = "map"; break; + case IP_LOOKUP_NEXT_MAP_T: t = "map-t"; break; + case IP_LOOKUP_NEXT_SIXRD: t = "sixrd"; break; + case IP_LOOKUP_NEXT_REWRITE: + break; + } + + if (t) + vec_add (s, t, strlen (t)); + + return s; +} + +static u8 * format_ip_interface_address (u8 * s, va_list * args) +{ + ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); + u32 if_address_index = va_arg (*args, u32); + ip_interface_address_t * ia = pool_elt_at_index (lm->if_address_pool, if_address_index); + void * a = ip_interface_address_get_address (lm, ia); + + if (lm->is_ip6) + return format (s, "%U", format_ip6_address_and_length, a, ia->address_length); + else + return format (s, "%U", format_ip4_address_and_length, a, ia->address_length); +} + +u8 * format_ip_adjacency (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); + u32 adj_index = va_arg (*args, u32); + ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_REWRITE: + s = format (s, "%U", + format_vnet_rewrite, + vnm->vlib_main, &adj->rewrite_header, sizeof (adj->rewrite_data)); + break; + + default: + s = format (s, "%U", format_ip_lookup_next, adj->lookup_next_index); + if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP) + s = format (s, " %U", + format_vnet_sw_interface_name, + vnm, + vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index)); + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + case IP_LOOKUP_NEXT_LOCAL: + if (adj->if_address_index != ~0) + s = format (s, " %U", format_ip_interface_address, lm, adj->if_address_index); + break; + + case IP_LOOKUP_NEXT_CLASSIFY: + s = format (s, " table %d", adj->classify_table_index); + + default: + break; + } + break; + } + if (adj->explicit_fib_index != ~0 && adj->explicit_fib_index != 0) + s = format (s, " lookup fib index %d", adj->explicit_fib_index); + + return s; +} + +u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); + u32 adj_index = va_arg (*args, u32); + u8 * packet_data = va_arg (*args, u8 *); + u32 n_packet_data_bytes = va_arg (*args, u32); + ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_REWRITE: + s = format (s, "%U", + format_vnet_rewrite_header, + vnm->vlib_main, &adj->rewrite_header, packet_data, n_packet_data_bytes); + break; + + default: + break; + } + + return s; +} + +static uword unformat_ip_lookup_next (unformat_input_t * input, va_list * args) +{ + ip_lookup_next_t * result = va_arg (*args, ip_lookup_next_t *); + ip_lookup_next_t n; + + if (unformat (input, "drop")) + n = IP_LOOKUP_NEXT_DROP; + + else if (unformat (input, "punt")) + n = IP_LOOKUP_NEXT_PUNT; + + else if (unformat (input, "local")) + n = IP_LOOKUP_NEXT_LOCAL; + + else if (unformat (input, "arp")) + n = IP_LOOKUP_NEXT_ARP; + + else if (unformat (input, "classify")) + n = IP_LOOKUP_NEXT_CLASSIFY; + + else + return 0; + + *result = n; + return 1; +} + +static uword unformat_ip_adjacency (unformat_input_t * input, va_list * args) +{ + vlib_main_t * vm = va_arg (*args, vlib_main_t *); + ip_adjacency_t * adj = va_arg (*args, ip_adjacency_t *); + u32 node_index = va_arg (*args, u32); + vnet_main_t * vnm = vnet_get_main(); + u32 sw_if_index, is_ip6; + ip46_address_t a46; + ip_lookup_next_t next; + + is_ip6 = node_index == ip6_rewrite_node.index; + adj->rewrite_header.node_index = node_index; + adj->explicit_fib_index = ~0; + + if (unformat (input, "arp %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip46_address, &a46, is_ip6)) + { + ip_lookup_main_t * lm = is_ip6 ? &ip6_main.lookup_main : &ip4_main.lookup_main; + ip_adjacency_t * a_adj; + u32 adj_index; + + if (is_ip6) + adj_index = ip6_fib_lookup (&ip6_main, sw_if_index, &a46.ip6); + else + adj_index = ip4_fib_lookup (&ip4_main, sw_if_index, &a46.ip4); + + a_adj = ip_get_adjacency (lm, adj_index); + + if (a_adj->rewrite_header.sw_if_index != sw_if_index) + return 0; + + if (is_ip6) + ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index); + else + ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index); + } + + else if (unformat_user (input, unformat_ip_lookup_next, &next)) + { + adj->lookup_next_index = next; + adj->if_address_index = ~0; + if (next == IP_LOOKUP_NEXT_LOCAL) + (void) unformat (input, "%d", &adj->if_address_index); + else if (next == IP_LOOKUP_NEXT_CLASSIFY) + if (!unformat (input, "%d", &adj->classify_table_index)) + { + clib_warning ("classify adj must specify table index"); + return 0; + } + } + + else if (unformat_user (input, + unformat_vnet_rewrite, + vm, &adj->rewrite_header, sizeof (adj->rewrite_data))) + adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + + else + return 0; + + return 1; +} + +clib_error_t * +vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 table_id, is_del; + u32 weight, * weights = 0; + u32 * table_ids = 0; + u32 sw_if_index, * sw_if_indices = 0; + ip4_address_t ip4_addr, * ip4_dst_addresses = 0, * ip4_via_next_hops = 0; + ip6_address_t ip6_addr, * ip6_dst_addresses = 0, * ip6_via_next_hops = 0; + u32 dst_address_length, * dst_address_lengths = 0; + ip_adjacency_t parse_adj, * add_adj = 0; + unformat_input_t _line_input, * line_input = &_line_input; + f64 count; + u32 outer_table_id; + + is_del = 0; + table_id = 0; + count = 1; + + /* Get a line of input. */ + if (! unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + memset(&parse_adj, 0, sizeof (parse_adj)); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "table %d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "count %f", &count)) + ; + + else if (unformat (line_input, "%U/%d", + unformat_ip4_address, &ip4_addr, + &dst_address_length)) + { + vec_add1 (ip4_dst_addresses, ip4_addr); + vec_add1 (dst_address_lengths, dst_address_length); + } + + else if (unformat (line_input, "%U/%d", + unformat_ip6_address, &ip6_addr, + &dst_address_length)) + { + vec_add1 (ip6_dst_addresses, ip6_addr); + vec_add1 (dst_address_lengths, dst_address_length); + } + + else if (unformat (line_input, "via %U %U weight %u", + unformat_ip4_address, &ip4_addr, + unformat_vnet_sw_interface, vnm, &sw_if_index, + &weight)) + { + vec_add1 (ip4_via_next_hops, ip4_addr); + vec_add1 (sw_if_indices, sw_if_index); + vec_add1 (weights, weight); + vec_add1 (table_ids, (u32)~0); + } + + else if (unformat (line_input, "via %U %U weight %u", + unformat_ip6_address, &ip6_addr, + unformat_vnet_sw_interface, vnm, &sw_if_index, + &weight)) + { + vec_add1 (ip6_via_next_hops, ip6_addr); + vec_add1 (sw_if_indices, sw_if_index); + vec_add1 (weights, weight); + vec_add1 (table_ids, (u32)~0); + } + + else if (unformat (line_input, "via %U %U", + unformat_ip4_address, &ip4_addr, + unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + vec_add1 (ip4_via_next_hops, ip4_addr); + vec_add1 (sw_if_indices, sw_if_index); + vec_add1 (weights, 1); + vec_add1 (table_ids, (u32)~0); + } + + else if (unformat (line_input, "via %U %U", + unformat_ip6_address, &ip6_addr, + unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + vec_add1 (ip6_via_next_hops, ip6_addr); + vec_add1 (sw_if_indices, sw_if_index); + vec_add1 (weights, 1); + vec_add1 (table_ids, (u32)~0); + } + else if (unformat (line_input, "via %U", + unformat_ip4_address, &ip4_addr)) + { + vec_add1 (ip4_via_next_hops, ip4_addr); + vec_add1 (sw_if_indices, (u32)~0); + vec_add1 (weights, 1); + vec_add1 (table_ids, table_id); + } + else if (unformat (line_input, "via %U", + unformat_ip6_address, &ip6_addr)) + { + vec_add1 (ip6_via_next_hops, ip6_addr); + vec_add1 (sw_if_indices, (u32)~0); + vec_add1 (weights, 1); + vec_add1 (table_ids, (u32)table_id); + } + + else if (vec_len (ip4_dst_addresses) > 0 + && unformat (line_input, "via %U", + unformat_ip_adjacency, vm, &parse_adj, ip4_rewrite_node.index)) + vec_add1 (add_adj, parse_adj); + + else if (vec_len (ip6_dst_addresses) > 0 + && unformat (line_input, "via %U", + unformat_ip_adjacency, vm, &parse_adj, ip6_rewrite_node.index)) + vec_add1 (add_adj, parse_adj); + else if (unformat (line_input, "lookup in table %d", &outer_table_id)) + { + uword * p; + + if (vec_len (ip4_dst_addresses) > 0) + p = hash_get (ip4_main.fib_index_by_table_id, outer_table_id); + else + p = hash_get (ip6_main.fib_index_by_table_id, outer_table_id); + + if (p == 0) + { + error = clib_error_return (0, "Nonexistent outer table id %d", + outer_table_id); + goto done; + } + + parse_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL; + parse_adj.explicit_fib_index = p[0]; + vec_add1 (add_adj, parse_adj); + } + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + unformat_free (line_input); + + if (vec_len (ip4_dst_addresses) + vec_len (ip6_dst_addresses) == 0) + { + error = clib_error_return (0, "expected ip4/ip6 destination address/length."); + goto done; + } + + if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_dst_addresses) > 0) + { + error = clib_error_return (0, "mixed ip4/ip6 address/length."); + goto done; + } + + if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_via_next_hops) > 0) + { + error = clib_error_return (0, "ip4 destinations with ip6 next hops."); + goto done; + } + + if (vec_len (ip6_dst_addresses) > 0 && vec_len (ip4_via_next_hops) > 0) + { + error = clib_error_return (0, "ip6 destinations with ip4 next hops."); + goto done; + } + + if (! is_del && vec_len (add_adj) + vec_len (weights) == 0) + { + error = clib_error_return (0, "no next hops or adjacencies to add."); + goto done; + } + + if (vec_len(ip4_via_next_hops)) + { + if (sw_if_indices[0] == (u32)~0) + { + u32 ai; + uword * p; + u32 fib_index; + ip_adjacency_t *nh_adj; + + p = hash_get (ip4_main.fib_index_by_table_id, table_ids[0]); + if (p == 0) + { + error = clib_error_return (0, "Nonexistent FIB id %d", + table_ids[0]); + goto done; + } + + fib_index = p[0]; + + ai = ip4_fib_lookup_with_table (&ip4_main, + fib_index, + ip4_via_next_hops, + 1 /* disable default route */); + if (ai == 0) + { + error = clib_error_return (0, "next hop %U not in FIB", + format_ip4_address, + ip4_via_next_hops); + goto done; + } + nh_adj = ip_get_adjacency (&ip4_main.lookup_main, ai); + vec_add1 (add_adj, nh_adj[0]); + } + } + if (vec_len(ip6_via_next_hops)) + { + if (sw_if_indices[0] == (u32)~0) + { + u32 ai; + uword * p; + u32 fib_index; + ip_adjacency_t *nh_adj; + + p = hash_get (ip6_main.fib_index_by_table_id, table_ids[0]); + if (p == 0) + { + error = clib_error_return (0, "Nonexistent FIB id %d", + table_ids[0]); + goto done; + } + + fib_index = p[0]; + ai = ip6_fib_lookup_with_table (&ip6_main, + fib_index, + ip6_via_next_hops); + if (ai == 0) + { + error = clib_error_return (0, "next hop %U not in FIB", + format_ip6_address, + ip6_via_next_hops); + goto done; + } + nh_adj = ip_get_adjacency (&ip6_main.lookup_main, ai); + vec_add1 (add_adj, nh_adj[0]); + } + } + + { + int i; + ip4_main_t * im4 = &ip4_main; + ip6_main_t * im6 = &ip6_main; + + for (i = 0; i < vec_len (ip4_dst_addresses); i++) + { + ip4_add_del_route_args_t a; + + memset (&a, 0, sizeof (a)); + a.flags = IP4_ROUTE_FLAG_TABLE_ID; + a.table_index_or_table_id = table_id; + a.dst_address = ip4_dst_addresses[i]; + a.dst_address_length = dst_address_lengths[i]; + a.adj_index = ~0; + + if (is_del) + { + if (vec_len (ip4_via_next_hops) == 0) + { + uword * dst_hash, * dst_result; + u32 dst_address_u32; + ip4_fib_t * fib; + + fib = find_ip4_fib_by_table_index_or_id (im4, table_id, + 0 /* by table id */); + + a.flags |= IP4_ROUTE_FLAG_DEL; + dst_address_u32 = a.dst_address.as_u32 + & im4->fib_masks[a.dst_address_length]; + + dst_hash = + fib->adj_index_by_dst_address[a.dst_address_length]; + dst_result = hash_get (dst_hash, dst_address_u32); + if (dst_result) + a.adj_index = dst_result[0]; + else + { + clib_warning ("%U/%d not in FIB", + format_ip4_address, &a.dst_address, + a.dst_address_length); + continue; + } + + ip4_add_del_route (im4, &a); + ip4_maybe_remap_adjacencies (im4, table_id, + IP4_ROUTE_FLAG_TABLE_ID); + } + else + { + u32 i, j, n, f, incr; + ip4_address_t dst = a.dst_address; + f64 t[2]; + n = count; + t[0] = vlib_time_now (vm); + incr = 1<<(32 - a.dst_address_length); + for (i = 0; i < n; i++) + { + f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0; + a.dst_address = dst; + for (j = 0; j < vec_len (ip4_via_next_hops); j++) + { + if (table_ids[j] != (u32)~0) + { + uword * p = hash_get (im4->fib_index_by_table_id, + table_ids[j]); + if (p == 0) + { + clib_warning ("no such FIB table %d", + table_ids[j]); + continue; + } + table_ids[j] = p[0]; + } + + ip4_add_del_route_next_hop (im4, + IP4_ROUTE_FLAG_DEL | f, + &a.dst_address, + a.dst_address_length, + &ip4_via_next_hops[j], + sw_if_indices[j], + weights[j], (u32)~0, + table_ids[j] /* fib index */); + } + dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32)); + } + t[1] = vlib_time_now (vm); + if (count > 1) + vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0])); + } + } + else + { + if (vec_len (add_adj) > 0) + { + a.flags |= IP4_ROUTE_FLAG_ADD; + a.add_adj = add_adj; + a.n_add_adj = vec_len (add_adj); + + ip4_add_del_route (im4, &a); + } + else if (vec_len (ip4_via_next_hops) > 0) + { + u32 i, j, n, f, incr; + ip4_address_t dst = a.dst_address; + f64 t[2]; + n = count; + t[0] = vlib_time_now (vm); + incr = 1<<(32 - a.dst_address_length); + for (i = 0; i < n; i++) + { + f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0; + a.dst_address = dst; + for (j = 0; j < vec_len (ip4_via_next_hops); j++) + { + if (table_ids[j] != (u32)~0) + { + uword * p = hash_get (im4->fib_index_by_table_id, + table_ids[j]); + if (p == 0) + { + clib_warning ("no such FIB table %d", + table_ids[j]); + continue; + } + table_ids[j] = p[0]; + } + ip4_add_del_route_next_hop (im4, + IP4_ROUTE_FLAG_ADD | f, + &a.dst_address, + a.dst_address_length, + &ip4_via_next_hops[j], + sw_if_indices[j], + weights[j], (u32)~0, + table_ids[j] /* fib index */); + } + dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32)); + } + t[1] = vlib_time_now (vm); + if (count > 1) + vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0])); + } + } + } + + for (i = 0; i < vec_len (ip6_dst_addresses); i++) + { + ip6_add_del_route_args_t a; + + + memset (&a, 0, sizeof (a)); + a.flags = IP6_ROUTE_FLAG_TABLE_ID; + a.table_index_or_table_id = table_id; + a.dst_address = ip6_dst_addresses[i]; + a.dst_address_length = dst_address_lengths[i]; + a.adj_index = ~0; + + if (is_del) + { + if (vec_len (ip6_via_next_hops) == 0) + { + BVT(clib_bihash_kv) kv, value; + ip6_address_t dst_address; + ip6_fib_t * fib; + + fib = find_ip6_fib_by_table_index_or_id (im6, table_id, + 0 /* by table id */); + + a.flags |= IP4_ROUTE_FLAG_DEL; + + dst_address = ip6_dst_addresses[i]; + + ip6_address_mask (&dst_address, + &im6->fib_masks[dst_address_length]); + + kv.key[0] = dst_address.as_u64[0]; + kv.key[1] = dst_address.as_u64[1]; + kv.key[2] = ((u64)(fib - im6->fibs)<<32) + | a.dst_address_length; + + if (BV(clib_bihash_search)(&im6->ip6_lookup_table, + &kv, &value) == 0) + a.adj_index = value.value; + else + { + clib_warning ("%U/%d not in FIB", + format_ip6_address, &a.dst_address, + a.dst_address_length); + continue; + } + + a.flags |= IP6_ROUTE_FLAG_DEL; + ip6_add_del_route (im6, &a); + ip6_maybe_remap_adjacencies (im6, table_id, + IP6_ROUTE_FLAG_TABLE_ID); + } + else + { + u32 i; + for (i = 0; i < vec_len (ip6_via_next_hops); i++) + { + ip6_add_del_route_next_hop (im6, + IP6_ROUTE_FLAG_DEL, + &a.dst_address, + a.dst_address_length, + &ip6_via_next_hops[i], + sw_if_indices[i], + weights[i], (u32)~0, + table_ids[i] /* fib index */); + } + } + } + else + { + if (vec_len (add_adj) > 0) + { + a.flags |= IP6_ROUTE_FLAG_ADD; + a.add_adj = add_adj; + a.n_add_adj = vec_len (add_adj); + + ip6_add_del_route (im6, &a); + } + else if (vec_len (ip6_via_next_hops) > 0) + { + u32 i; + for (i = 0; i < vec_len (ip6_via_next_hops); i++) + { + ip6_add_del_route_next_hop (im6, + IP6_ROUTE_FLAG_ADD, + &a.dst_address, + a.dst_address_length, + &ip6_via_next_hops[i], + sw_if_indices[i], + weights[i], (u32)~0, + table_ids[i]); + } + } + } + } + } + + done: + vec_free (add_adj); + vec_free (weights); + vec_free (dst_address_lengths); + vec_free (ip4_dst_addresses); + vec_free (ip6_dst_addresses); + vec_free (ip4_via_next_hops); + vec_free (ip6_via_next_hops); + return error; +} + +VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = { + .path = "ip", + .short_help = "Internet protocol (IP) commands", +}; + +VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = { + .path = "show ip", + .short_help = "Internet protocol (IP) show commands", +}; + +VLIB_CLI_COMMAND (vlib_cli_show_ip4_command, static) = { + .path = "show ip4", + .short_help = "Internet protocol version 4 (IP4) show commands", +}; + +VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = { + .path = "show ip6", + .short_help = "Internet protocol version 6 (IP6) show commands", +}; + +VLIB_CLI_COMMAND (ip_route_command, static) = { + .path = "ip route", + .short_help = "Add/delete IP routes", + .function = vnet_ip_route_cmd, +}; + +/* + * The next two routines address a longstanding script hemorrhoid. + * Probing a v4 or v6 neighbor needs to appear to be synchronous, + * or dependent route-adds will simply fail. + */ +static clib_error_t * +ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index, + int retry_count) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * e; + int i; + int resolved = 0; + uword event_type; + uword *event_data = 0; + + ASSERT (vlib_in_process_context(vm)); + + if (retry_count > 0) + vnet_register_ip6_neighbor_resolution_event + (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index, + 1 /* event */, 0 /* data */); + + for (i = 0; i < retry_count; i++) + { + /* The interface may be down, etc. */ + e = ip6_probe_neighbor (vm, a, sw_if_index); + + if (e) + return e; + + vlib_process_wait_for_event_or_clock (vm, 1.0); + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case 1: /* resolved... */ + vlib_cli_output (vm, "Resolved %U", + format_ip6_address, a); + resolved = 1; + goto done; + + case ~0: /* timeout */ + break; + + default: + clib_warning ("unknown event_type %d", event_type); + } + } + + done: + vec_reset_length (event_data); + + if (!resolved) + return clib_error_return (0, "Resolution failed for %U", + format_ip6_address, a); + return 0; +} + +static clib_error_t * +ip4_probe_neighbor_wait (vlib_main_t *vm, ip4_address_t * a, u32 sw_if_index, + int retry_count) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * e; + int i; + int resolved = 0; + uword event_type; + uword *event_data = 0; + + ASSERT (vlib_in_process_context(vm)); + + if (retry_count > 0) + vnet_register_ip4_arp_resolution_event + (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index, + 1 /* event */, 0 /* data */); + + for (i = 0; i < retry_count; i++) + { + /* The interface may be down, etc. */ + e = ip4_probe_neighbor (vm, a, sw_if_index); + + if (e) + return e; + + vlib_process_wait_for_event_or_clock (vm, 1.0); + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case 1: /* resolved... */ + vlib_cli_output (vm, "Resolved %U", + format_ip4_address, a); + resolved = 1; + goto done; + + case ~0: /* timeout */ + break; + + default: + clib_warning ("unknown event_type %d", event_type); + } + } + + done: + + vec_reset_length (event_data); + + if (!resolved) + return clib_error_return (0, "Resolution failed for %U", + format_ip4_address, a); + return 0; +} + +static clib_error_t * +probe_neighbor_address (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + unformat_input_t _line_input, * line_input = &_line_input; + ip4_address_t a4; + ip6_address_t a6; + clib_error_t * error = 0; + u32 sw_if_index = ~0; + int retry_count = 3; + int is_ip4 = 1; + int address_set = 0; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat_user (line_input, unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else if (unformat (line_input, "retry %d", &retry_count)) + ; + + else if (unformat (line_input, "%U", unformat_ip4_address, &a4)) + address_set++; + else if (unformat (line_input, "%U", unformat_ip6_address, &a6)) + { + address_set++; + is_ip4 = 0; + } + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (sw_if_index == ~0) + return clib_error_return (0, "Interface required, not set."); + if (address_set == 0) + return clib_error_return (0, "ip address required, not set."); + if (address_set > 1) + return clib_error_return (0, "Multiple ip addresses not supported."); + + if (is_ip4) + error = ip4_probe_neighbor_wait (vm, &a4, sw_if_index, retry_count); + else + error = ip6_probe_neighbor_wait (vm, &a6, sw_if_index, retry_count); + + return error; +} + +VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = { + .path = "ip probe-neighbor", + .function = probe_neighbor_address, + .short_help = "ip probe-neighbor <intfc> <ip4-addr> | <ip6-addr> [retry nn]", +}; + +typedef CLIB_PACKED (struct { + ip4_address_t address; + + u32 address_length : 6; + + u32 index : 26; +}) ip4_route_t; + +static clib_error_t * +ip4_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im4 = &ip4_main; + ip4_route_t * routes, * r; + ip4_fib_t * fib; + ip_lookup_main_t * lm = &im4->lookup_main; + uword * results, i; + int verbose, matching, mtrie, include_empty_fibs; + ip4_address_t matching_address; + u8 clear = 0; + int table_id = -1; + + routes = 0; + results = 0; + verbose = 1; + include_empty_fibs = 0; + matching = 0; + mtrie = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "brief") || unformat (input, "summary") + || unformat (input, "sum")) + verbose = 0; + + else if (unformat (input, "mtrie")) + mtrie = 1; + + else if (unformat (input, "include-empty")) + include_empty_fibs = 1; + + else if (unformat (input, "%U", unformat_ip4_address, &matching_address)) + matching = 1; + + else if (unformat (input, "clear")) + clear = 1; + + else if (unformat (input, "table %d", &table_id)) + ; + else + break; + } + + vec_foreach (fib, im4->fibs) + { + int fib_not_empty; + + fib_not_empty = 0; + for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) + { + uword * hash = fib->adj_index_by_dst_address[i]; + uword n_elts = hash_elts (hash); + if (n_elts) + { + fib_not_empty = 1; + break; + } + } + + if (fib_not_empty == 0 && include_empty_fibs == 0) + continue; + + if (table_id >= 0 && table_id != (int)fib->table_id) + continue; + + if (include_empty_fibs) + vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U", + fib->table_id, fib - im4->fibs, + format_ip_flow_hash_config, fib->flow_hash_config); + + /* Show summary? */ + if (! verbose) + { + if (include_empty_fibs == 0) + vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U", + fib->table_id, fib - im4->fibs, + format_ip_flow_hash_config, fib->flow_hash_config); + vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); + for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) + { + uword * hash = fib->adj_index_by_dst_address[i]; + uword n_elts = hash_elts (hash); + if (n_elts > 0) + vlib_cli_output (vm, "%20d%16d", i, n_elts); + } + continue; + } + + if (routes) + _vec_len (routes) = 0; + if (results) + _vec_len (results) = 0; + + for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) + { + uword * hash = fib->adj_index_by_dst_address[i]; + hash_pair_t * p; + ip4_route_t x; + + x.address_length = i; + + if (matching) + { + x.address.as_u32 = matching_address.as_u32 & im4->fib_masks[i]; + p = hash_get_pair (hash, x.address.as_u32); + if (p) + { + if (lm->fib_result_n_words > 1) + { + x.index = vec_len (results); + vec_add (results, p->value, lm->fib_result_n_words); + } + else + x.index = p->value[0]; + vec_add1 (routes, x); + } + } + else + { + hash_foreach_pair (p, hash, ({ + x.address.data_u32 = p->key; + if (lm->fib_result_n_words > 1) + { + x.index = vec_len (results); + vec_add (results, p->value, lm->fib_result_n_words); + } + else + x.index = p->value[0]; + + vec_add1 (routes, x); + })); + } + } + + vec_sort (routes, r1, r2, + ({ int cmp = ip4_address_compare (&r1->address, &r2->address); + cmp ? cmp : ((int) r1->address_length - (int) r2->address_length); })); + if (vec_len(routes)) { + if (include_empty_fibs == 0) + vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U", + fib->table_id, fib - im4->fibs, + format_ip_flow_hash_config, fib->flow_hash_config); + if (mtrie) + vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie); + vlib_cli_output (vm, "%=20s%=16s%=16s%=16s", + "Destination", "Packets", "Bytes", "Adjacency"); + } + vec_foreach (r, routes) + { + vlib_counter_t c, sum; + uword i, j, n_left, n_nhs, adj_index, * result = 0; + ip_adjacency_t * adj; + ip_multipath_next_hop_t * nhs, tmp_nhs[1]; + + adj_index = r->index; + if (lm->fib_result_n_words > 1) + { + result = vec_elt_at_index (results, adj_index); + adj_index = result[0]; + } + + adj = ip_get_adjacency (lm, adj_index); + if (adj->n_adj == 1) + { + nhs = &tmp_nhs[0]; + nhs[0].next_hop_adj_index = ~0; /* not used */ + nhs[0].weight = 1; + n_nhs = 1; + } + else + { + ip_multipath_adjacency_t * madj; + madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle); + nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset); + n_nhs = madj->normalized_next_hops.count; + } + + n_left = nhs[0].weight; + vlib_counter_zero (&sum); + for (i = j = 0; i < adj->n_adj; i++) + { + n_left -= 1; + vlib_get_combined_counter (&lm->adjacency_counters, + adj_index + i, &c); + if (clear) + vlib_zero_combined_counter (&lm->adjacency_counters, + adj_index + i); + vlib_counter_add (&sum, &c); + if (n_left == 0) + { + u8 * msg = 0; + uword indent; + + if (j == 0) + msg = format (msg, "%-20U", + format_ip4_address_and_length, + r->address.data, r->address_length); + else + msg = format (msg, "%U", format_white_space, 20); + + msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes); + + indent = vec_len (msg); + msg = format (msg, "weight %d, index %d\n%U%U", + nhs[j].weight, adj_index + i, + format_white_space, indent, + format_ip_adjacency, + vnm, lm, adj_index + i); + + vlib_cli_output (vm, "%v", msg); + vec_free (msg); + + if (result && lm->format_fib_result) + vlib_cli_output (vm, "%20s%U", "", + lm->format_fib_result, vm, lm, result, + i + 1 - nhs[j].weight, + nhs[j].weight); + + j++; + if (j < n_nhs) + { + n_left = nhs[j].weight; + vlib_counter_zero (&sum); + } + } + } + } + } + + vec_free (routes); + vec_free (results); + + return 0; +} + +VLIB_CLI_COMMAND (ip4_show_fib_command, static) = { + .path = "show ip fib", + .short_help = "show ip fib [mtrie] [summary] [table <n>] [<ip4-addr>] [clear] [include-empty]", + .function = ip4_show_fib, +}; + +typedef struct { + ip6_address_t address; + + u32 address_length; + + u32 index; +} ip6_route_t; + +typedef struct { + u32 fib_index; + ip6_route_t ** routep; +} add_routes_in_fib_arg_t; + +static void add_routes_in_fib (BVT(clib_bihash_kv) * kvp, void *arg) +{ + add_routes_in_fib_arg_t * ap = arg; + + if (kvp->key[2]>>32 == ap->fib_index) + { + ip6_address_t *addr; + ip6_route_t * r; + addr = (ip6_address_t *) kvp; + vec_add2 (*ap->routep, r, 1); + r->address = addr[0]; + r->address_length = kvp->key[2] & 0xFF; + r->index = kvp->value; + } +} + +typedef struct { + u32 fib_index; + u64 count_by_prefix_length[129]; +} count_routes_in_fib_at_prefix_length_arg_t; + +static void count_routes_in_fib_at_prefix_length +(BVT(clib_bihash_kv) * kvp, void *arg) +{ + count_routes_in_fib_at_prefix_length_arg_t * ap = arg; + int mask_width; + + if ((kvp->key[2]>>32) != ap->fib_index) + return; + + mask_width = kvp->key[2] & 0xFF; + + ap->count_by_prefix_length[mask_width]++; +} + + +static clib_error_t * +ip6_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + ip6_main_t * im6 = &ip6_main; + ip6_route_t * routes, * r; + ip6_fib_t * fib; + ip_lookup_main_t * lm = &im6->lookup_main; + uword * results; + int verbose; + BVT(clib_bihash) * h = &im6->ip6_lookup_table; + __attribute__((unused)) u8 clear = 0; + add_routes_in_fib_arg_t _a, *a=&_a; + count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca; + + routes = 0; + results = 0; + verbose = 1; + if (unformat (input, "brief") || unformat (input, "summary") + || unformat (input, "sum")) + verbose = 0; + + if (unformat (input, "clear")) + clear = 1; + + vlib_cli_output (vm, "FIB lookup table: %d buckets, %lld MB heap", + im6->lookup_table_nbuckets, im6->lookup_table_size>>20); + vlib_cli_output (vm, "%U", format_mheap, h->mheap, 0 /*verbose*/); + vlib_cli_output (vm, " "); + + vec_foreach (fib, im6->fibs) + { + vlib_cli_output (vm, "VRF %d, fib_index %d, flow hash: %U", + fib->table_id, fib - im6->fibs, + format_ip_flow_hash_config, fib->flow_hash_config); + + /* Show summary? */ + if (! verbose) + { + int len; + vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); + + memset (ca, 0, sizeof(*ca)); + ca->fib_index = fib - im6->fibs; + + BV(clib_bihash_foreach_key_value_pair) + (h, count_routes_in_fib_at_prefix_length, ca); + + for (len = 128; len >= 0; len--) + { + if (ca->count_by_prefix_length[len]) + vlib_cli_output (vm, "%=20d%=16lld", + len, ca->count_by_prefix_length[len]); + } + continue; + } + + if (routes) + _vec_len (routes) = 0; + if (results) + _vec_len (results) = 0; + + a->fib_index = fib - im6->fibs; + a->routep = &routes; + + BV(clib_bihash_foreach_key_value_pair)(h, add_routes_in_fib, a); + + vec_sort (routes, r1, r2, + ({ int cmp = ip6_address_compare (&r1->address, &r2->address); + cmp ? cmp : ((int) r1->address_length - (int) r2->address_length); })); + + vlib_cli_output (vm, "%=45s%=16s%=16s%=16s", + "Destination", "Packets", "Bytes", "Adjacency"); + vec_foreach (r, routes) + { + vlib_counter_t c, sum; + uword i, j, n_left, n_nhs, adj_index, * result = 0; + ip_adjacency_t * adj; + ip_multipath_next_hop_t * nhs, tmp_nhs[1]; + + adj_index = r->index; + if (lm->fib_result_n_words > 1) + { + result = vec_elt_at_index (results, adj_index); + adj_index = result[0]; + } + + adj = ip_get_adjacency (lm, adj_index); + if (adj->n_adj == 1) + { + nhs = &tmp_nhs[0]; + nhs[0].next_hop_adj_index = ~0; /* not used */ + nhs[0].weight = 1; + n_nhs = 1; + } + else + { + ip_multipath_adjacency_t * madj; + madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle); + nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset); + n_nhs = madj->normalized_next_hops.count; + } + + n_left = nhs[0].weight; + vlib_counter_zero (&sum); + for (i = j = 0; i < adj->n_adj; i++) + { + n_left -= 1; + vlib_get_combined_counter (&lm->adjacency_counters, + adj_index + i, &c); + if (clear) + vlib_zero_combined_counter (&lm->adjacency_counters, + adj_index + i); + vlib_counter_add (&sum, &c); + if (n_left == 0) + { + u8 * msg = 0; + uword indent; + + if (j == 0) + msg = format (msg, "%-45U", + format_ip6_address_and_length, + r->address.as_u8, r->address_length); + else + msg = format (msg, "%U", format_white_space, 20); + + msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes); + + indent = vec_len (msg); + msg = format (msg, "weight %d, index %d\n%U%U", + nhs[j].weight, adj_index + i, + format_white_space, indent, + format_ip_adjacency, + vnm, lm, adj_index + i); + + vlib_cli_output (vm, "%v", msg); + vec_free (msg); + + j++; + if (j < n_nhs) + { + n_left = nhs[j].weight; + vlib_counter_zero (&sum); + } + } + } + + if (result && lm->format_fib_result) + vlib_cli_output (vm, "%20s%U", "", lm->format_fib_result, vm, lm, result, 0); + } + vlib_cli_output (vm, " "); + } + + vec_free (routes); + vec_free (results); + + return 0; +} + +VLIB_CLI_COMMAND (ip6_show_fib_command, static) = { + .path = "show ip6 fib", + .short_help = "show ip6 fib [summary] [clear]", + .function = ip6_show_fib, +}; diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h new file mode 100644 index 00000000000..e4e5acfece3 --- /dev/null +++ b/vnet/vnet/ip/lookup.h @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip_lookup.h: ip (4 or 6) lookup structures, adjacencies, ... + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_lookup_h +#define included_ip_lookup_h + +#include <vnet/vnet.h> +#include <vlib/buffer.h> + +/* Next index stored in adjacency. */ +typedef enum { + /* Packet does not match any route in table. */ + IP_LOOKUP_NEXT_MISS, + + /* Adjacency says to drop or punt this packet. */ + IP_LOOKUP_NEXT_DROP, + IP_LOOKUP_NEXT_PUNT, + + /* This packet is for one of our own IP addresses. */ + IP_LOOKUP_NEXT_LOCAL, + + /* This packet matches an "interface route" and packets + need to be passed to ARP to find rewrite string for + this destination. */ + IP_LOOKUP_NEXT_ARP, + + /* This packet is to be rewritten and forwarded to the next + processing node. This is typically the output interface but + might be another node for further output processing. */ + IP_LOOKUP_NEXT_REWRITE, + + /* This packet needs to be classified */ + IP_LOOKUP_NEXT_CLASSIFY, + + /* This packet needs to go to MAP - RFC7596, RFC7597 */ + IP_LOOKUP_NEXT_MAP, + + /* This packet needs to go to MAP with Translation - RFC7599 */ + IP_LOOKUP_NEXT_MAP_T, + + /* This packets needs to go to 6RD (RFC5969) */ + IP_LOOKUP_NEXT_SIXRD, + + /* Hop-by-hop header handling */ + IP_LOOKUP_NEXT_HOP_BY_HOP, + IP_LOOKUP_NEXT_ADD_HOP_BY_HOP, + IP_LOOKUP_NEXT_POP_HOP_BY_HOP, + + IP_LOOKUP_N_NEXT, +} ip_lookup_next_t; + +/* Flow hash configuration */ +#define IP_FLOW_HASH_SRC_ADDR (1<<0) +#define IP_FLOW_HASH_DST_ADDR (1<<1) +#define IP_FLOW_HASH_PROTO (1<<2) +#define IP_FLOW_HASH_SRC_PORT (1<<3) +#define IP_FLOW_HASH_DST_PORT (1<<4) +#define IP_FLOW_HASH_REVERSE_SRC_DST (1<<5) + +/* Default: 5-tuple without the "reverse" bit */ +#define IP_FLOW_HASH_DEFAULT (0x1F) + +#define foreach_flow_hash_bit \ +_(src, IP_FLOW_HASH_SRC_ADDR) \ +_(dst, IP_FLOW_HASH_DST_ADDR) \ +_(sport, IP_FLOW_HASH_SRC_PORT) \ +_(dport, IP_FLOW_HASH_DST_PORT) \ +_(proto, IP_FLOW_HASH_PROTO) \ +_(reverse, IP_FLOW_HASH_REVERSE_SRC_DST) + +/* IP unicast adjacency. */ +typedef struct { + /* Handle for this adjacency in adjacency heap. */ + u32 heap_handle; + + /* Interface address index for this local/arp adjacency. */ + u32 if_address_index; + + /* Number of adjecencies in block. Greater than 1 means multipath; + otherwise equal to 1. */ + u16 n_adj; + + /* Next hop after ip4-lookup. */ + union { + ip_lookup_next_t lookup_next_index : 16; + u16 lookup_next_index_as_int; + }; + + /* Force re-lookup in a different FIB. ~0 => normal behavior */ + i16 explicit_fib_index; + u16 mcast_group_index; + + /* When classifying, start here */ + u16 classify_table_index; + /* Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */ + u16 saved_lookup_next_index; + + vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE - 5*sizeof(u32)); +} ip_adjacency_t; + +/* Index into adjacency table. */ +typedef u32 ip_adjacency_index_t; + +typedef struct { + /* Directly connected next-hop adjacency index. */ + u32 next_hop_adj_index; + + /* Path weight for this adjacency. */ + u32 weight; +} ip_multipath_next_hop_t; + +typedef struct { + /* Adjacency index of first index in block. */ + u32 adj_index; + + /* Power of 2 size of adjacency block. */ + u32 n_adj_in_block; + + /* Number of prefixes that point to this adjacency. */ + u32 reference_count; + + /* Normalized next hops are used as hash keys: they are sorted by weight + and weights are chosen so they add up to 1 << log2_n_adj_in_block (with + zero-weighted next hops being deleted). + Unnormalized next hops are saved so that control plane has a record of exactly + what the RIB told it. */ + struct { + /* Number of hops in the multipath. */ + u32 count; + + /* Offset into next hop heap for this block. */ + u32 heap_offset; + + /* Heap handle used to for example free block when we're done with it. */ + u32 heap_handle; + } normalized_next_hops, unnormalized_next_hops; +} ip_multipath_adjacency_t; + +/* IP multicast adjacency. */ +typedef struct { + /* Handle for this adjacency in adjacency heap. */ + u32 heap_handle; + + /* Number of adjecencies in block. */ + u32 n_adj; + + /* Rewrite string. */ + vnet_declare_rewrite (64 - 2*sizeof(u32)); +} ip_multicast_rewrite_t; + +typedef struct { + /* ip4-multicast-rewrite next index. */ + u32 next_index; + + u8 n_rewrite_bytes; + + u8 rewrite_string[64 - 1*sizeof(u32) - 1*sizeof(u8)]; +} ip_multicast_rewrite_string_t; + +typedef struct { + ip_multicast_rewrite_t * rewrite_heap; + + ip_multicast_rewrite_string_t * rewrite_strings; + + /* Negative rewrite string index; >= 0 sw_if_index. + Sorted. Used to hash. */ + i32 ** adjacency_id_vector; + + uword * adjacency_by_id_vector; +} ip_multicast_lookup_main_t; + +typedef struct { + /* Key for mhash; in fact, just a byte offset into mhash key vector. */ + u32 address_key; + + /* Interface which has this address. */ + u32 sw_if_index; + + /* Adjacency for neighbor probe (ARP) for this interface address. */ + u32 neighbor_probe_adj_index; + + /* Address (prefix) length for this interface. */ + u16 address_length; + + /* Will be used for something eventually. Primary vs. secondary? */ + u16 flags; + + /* Next and previous pointers for doubly linked list of + addresses per software interface. */ + u32 next_this_sw_interface; + u32 prev_this_sw_interface; +} ip_interface_address_t; + +typedef enum { + IP_LOCAL_NEXT_DROP, + IP_LOCAL_NEXT_PUNT, + // IP_LOCAL_NEXT_TCP_LOOKUP, + IP_LOCAL_NEXT_UDP_LOOKUP, + IP_LOCAL_NEXT_ICMP, + IP_LOCAL_N_NEXT, +} ip_local_next_t; + +struct ip_lookup_main_t; + +typedef void (* ip_add_del_adjacency_callback_t) (struct ip_lookup_main_t * lm, + u32 adj_index, + ip_adjacency_t * adj, + u32 is_del); + +typedef struct { + vnet_config_main_t config_main; + + u32 * config_index_by_sw_if_index; +} ip_config_main_t; + +typedef struct ip_lookup_main_t { + /* Adjacency heap. */ + ip_adjacency_t * adjacency_heap; + + /* Adjacency packet/byte counters indexed by adjacency index. */ + vlib_combined_counter_main_t adjacency_counters; + + /* Heap of (next hop, weight) blocks. Sorted by next hop. */ + ip_multipath_next_hop_t * next_hop_heap; + + /* Indexed by heap_handle from ip_adjacency_t. */ + ip_multipath_adjacency_t * multipath_adjacencies; + + /* Temporary vectors for looking up next hops in hash. */ + ip_multipath_next_hop_t * next_hop_hash_lookup_key; + ip_multipath_next_hop_t * next_hop_hash_lookup_key_normalized; + + /* Hash table mapping normalized next hops and weights + to multipath adjacency index. */ + uword * multipath_adjacency_by_next_hops; + + u32 * adjacency_remap_table; + u32 n_adjacency_remaps; + + /* If average error per adjacency is less than this threshold adjacency block + size is accepted. */ + f64 multipath_next_hop_error_tolerance; + + /* Adjacency index for routing table misses, local punts, and drops. */ + u32 miss_adj_index, drop_adj_index, local_adj_index; + + /* Miss adjacency is always first in adjacency table. */ +#define IP_LOOKUP_MISS_ADJ_INDEX 0 + + ip_add_del_adjacency_callback_t * add_del_adjacency_callbacks; + + /* Pool of addresses that are assigned to interfaces. */ + ip_interface_address_t * if_address_pool; + + /* Hash table mapping address to index in interface address pool. */ + mhash_t address_to_if_address_index; + + /* Head of doubly linked list of interface addresses for each software interface. + ~0 means this interface has no address. */ + u32 * if_address_pool_index_by_sw_if_index; + + /* First table index to use for this interface, ~0 => none */ + u32 * classify_table_index_by_sw_if_index; + + /* rx/tx interface/feature configuration. */ + ip_config_main_t rx_config_mains[VNET_N_CAST], tx_config_main; + + /* Number of bytes in a fib result. Must be at least + sizeof (uword). First word is always adjacency index. */ + u32 fib_result_n_bytes, fib_result_n_words; + + format_function_t * format_fib_result; + + /* 1 for ip6; 0 for ip4. */ + u32 is_ip6; + + /* Either format_ip4_address_and_length or format_ip6_address_and_length. */ + format_function_t * format_address_and_length; + + /* Table mapping ip protocol to ip[46]-local node next index. */ + u8 local_next_by_ip_protocol[256]; + + /* IP_BUILTIN_PROTOCOL_{TCP,UDP,ICMP,OTHER} by protocol in IP header. */ + u8 builtin_protocol_by_ip_protocol[256]; +} ip_lookup_main_t; + +always_inline ip_adjacency_t * +ip_get_adjacency (ip_lookup_main_t * lm, + u32 adj_index) +{ + ip_adjacency_t * adj; + + adj = heap_elt_at_index (lm->adjacency_heap, adj_index); + + ASSERT (! heap_is_free_handle (lm->adjacency_heap, adj->heap_handle)); + + return adj; +} + +#define ip_prefetch_adjacency(lm,adj_index,type) \ +do { \ + ip_adjacency_t * _adj = (lm)->adjacency_heap + (adj_index); \ + CLIB_PREFETCH (_adj, sizeof (_adj[0]), type); \ +} while (0) + +always_inline void +ip_call_add_del_adjacency_callbacks (ip_lookup_main_t * lm, u32 adj_index, u32 is_del) +{ + ip_adjacency_t * adj; + uword i; + adj = ip_get_adjacency (lm, adj_index); + for (i = 0; i < vec_len (lm->add_del_adjacency_callbacks); i++) + lm->add_del_adjacency_callbacks[i] (lm, adj_index, adj, is_del); +} + +/* Create new block of given number of contiguous adjacencies. */ +ip_adjacency_t * +ip_add_adjacency (ip_lookup_main_t * lm, + ip_adjacency_t * adj, + u32 n_adj, + u32 * adj_index_result); + +void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index); + +void +ip_multipath_adjacency_free (ip_lookup_main_t * lm, + ip_multipath_adjacency_t * a); + +u32 +ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm, + u32 is_del, + u32 old_mp_adj_index, + u32 next_hop_adj_index, + u32 next_hop_weight, + u32 * new_mp_adj_index); + +clib_error_t * +ip_interface_address_add_del (ip_lookup_main_t * lm, + u32 sw_if_index, + void * address, + u32 address_length, + u32 is_del, + u32 * result_index); + +always_inline ip_interface_address_t * +ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib) +{ + uword * p = mhash_get (&lm->address_to_if_address_index, addr_fib); + return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0; +} + +always_inline void * +ip_interface_address_get_address (ip_lookup_main_t * lm, ip_interface_address_t * a) +{ return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key); } + +always_inline ip_interface_address_t * +ip_interface_address_for_packet (ip_lookup_main_t * lm, vlib_buffer_t * b, u32 sw_if_index) +{ + ip_adjacency_t * adj; + u32 if_address_index; + + adj = ip_get_adjacency (lm, vnet_buffer (b)->ip.adj_index[VLIB_TX]); + + ASSERT (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP + || adj->lookup_next_index == IP_LOOKUP_NEXT_LOCAL); + if_address_index = adj->if_address_index; + if_address_index = (if_address_index == ~0 ? + vec_elt (lm->if_address_pool_index_by_sw_if_index, sw_if_index) + : if_address_index); + + return pool_elt_at_index (lm->if_address_pool, if_address_index); +} + +#define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \ +do { \ + vnet_main_t *_vnm = vnet_get_main(); \ + u32 _sw_if_index = sw_if_index; \ + vnet_sw_interface_t *_swif; \ + _swif = vnet_get_sw_interface (_vnm, _sw_if_index); \ + \ + /* \ + * Loop => honor unnumbered interface addressing. \ + */ \ + if (loop && _swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) \ + _sw_if_index = _swif->unnumbered_sw_if_index; \ + u32 _ia = \ + (vec_len((lm)->if_address_pool_index_by_sw_if_index) \ + > (_sw_if_index)) \ + ? vec_elt ((lm)->if_address_pool_index_by_sw_if_index, \ + (_sw_if_index)) : (u32)~0; \ + ip_interface_address_t * _a; \ + while (_ia != ~0) \ + { \ + _a = pool_elt_at_index ((lm)->if_address_pool, _ia); \ + _ia = _a->next_this_sw_interface; \ + (a) = _a; \ + body; \ + } \ +} while (0) + +void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index); + +serialize_function_t serialize_ip_lookup_main, unserialize_ip_lookup_main; +serialize_function_t serialize_vec_ip_adjacency, unserialize_vec_ip_adjacency; + +#endif /* included_ip_lookup_h */ diff --git a/vnet/vnet/ip/ports.def b/vnet/vnet/ip/ports.def new file mode 100644 index 00000000000..cdb754f5b2e --- /dev/null +++ b/vnet/vnet/ip/ports.def @@ -0,0 +1,757 @@ +/* + * ip/ports.def: tcp/udp port definitions + * + * Eliot Dresselhaus + * August, 2005 + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* +PORT NUMBERS + +(last updated 18 October 2005) + +The port numbers are divided into three ranges: the Well Known Ports, +the Registered Ports, and the Dynamic and/or Private Ports. + +The Well Known Ports are those from 0 through 1023. + +The Registered Ports are those from 1024 through 49151 + +The Dynamic and/or Private Ports are those from 49152 through 65535 + + +************************************************************************ +* PLEASE NOTE THE FOLLOWING: * +* * +* 1. UNASSIGNED PORT NUMBERS SHOULD NOT BE USED. THE IANA WILL ASSIGN * +* THE NUMBER FOR THE PORT AFTER YOUR APPLICATION HAS BEEN APPROVED. * +* * +* 2. ASSIGNMENT OF A PORT NUMBER DOES NOT IN ANY WAY IMPLY AN * +* ENDORSEMENT OF AN APPLICATION OR PRODUCT, AND THE FACT THAT NETWORK * +* TRAFFIC IS FLOWING TO OR FROM A REGISTERED PORT DOES NOT MEAN THAT * +* IT IS "GOOD" TRAFFIC. FIREWALL AND SYSTEM ADMINISTRATORS SHOULD * +* CHOOSE HOW TO CONFIGURE THEIR SYSTEMS BASED ON THEIR KNOWLEDGE OF * +* THE TRAFFIC IN QUESTION, NOT WHETHER THERE IS A PORT NUMBER * +* REGISTERED OR NOT. * +************************************************************************ + + +WELL KNOWN PORT NUMBERS + +The Well Known Ports are assigned by the IANA and on most systems can +only be used by system (or root) processes or by programs executed by +privileged users. + +Ports are used in the TCP [RFC793] to name the ends of logical +connections which carry long term conversations. For the purpose of +providing services to unknown callers, a service contact port is +defined. This list specifies the port used by the server process as +its contact port. The contact port is sometimes called the +"well-known port". + +To the extent possible, these same port assignments are used with the +UDP [RFC768]. + +The range for assigned ports managed by the IANA is 0-1023. +*/ +ip_port (TCPMUX, 1) +ip_port (COMPRESS_NET_MANAGEMENT, 2) +ip_port (COMPRESS_NET, 3) +ip_port (RJE, 5) +ip_port (ECHO, 7) +ip_port (DISCARD, 9) +ip_port (SYSTAT, 11) +ip_port (DAYTIME, 13) +ip_port (QOTD, 17) +ip_port (MSP, 18) +ip_port (CHARGEN, 19) +ip_port (FTP_DATA, 20) +ip_port (FTP, 21) +ip_port (SSH, 22) +ip_port (TELNET, 23) +ip_port (SMTP, 25) +ip_port (NSW_FE, 27) +ip_port (MSG_ICP, 29) +ip_port (MSG_AUTH, 31) +ip_port (DSP, 33) +ip_port (TIME, 37) +ip_port (RAP, 38) +ip_port (RLP, 39) +ip_port (GRAPHICS, 41) +ip_port (NAME, 42) +ip_port (NAMESERVER, 42) +ip_port (NICNAME, 43) +ip_port (MPM_FLAGS, 44) +ip_port (MPM, 45) +ip_port (MPM_SND, 46) +ip_port (NI_FTP, 47) +ip_port (AUDITD, 48) +ip_port (TACACS, 49) +ip_port (RE_MAIL_CK, 50) +ip_port (LA_MAINT, 51) +ip_port (XNS_TIME, 52) +ip_port (DNS, 53) +ip_port (XNS_CH, 54) +ip_port (ISI_GL, 55) +ip_port (XNS_AUTH, 56) +ip_port (XNS_MAIL, 58) +ip_port (NI_MAIL, 61) +ip_port (ACAS, 62) +ip_port (WHOIS_PLUS_PLUS, 63) +ip_port (COVIA, 64) +ip_port (TACACS_DS, 65) +ip_port (ORACLE_SQL_NET, 66) +ip_port (BOOTPS, 67) +ip_port (BOOTPC, 68) +ip_port (TFTP, 69) +ip_port (GOPHER, 70) +ip_port (NETRJS_1, 71) +ip_port (NETRJS_2, 72) +ip_port (NETRJS_3, 73) +ip_port (NETRJS_4, 74) +ip_port (DEOS, 76) +ip_port (VETTCP, 78) +ip_port (FINGER, 79) +ip_port (WWW, 80) +ip_port (HOSTS2_NS, 81) +ip_port (XFER, 82) +ip_port (MIT_ML_DEV, 83) +ip_port (CTF, 84) +ip_port (MIT_ML_DEV1, 85) +ip_port (MFCOBOL, 86) +ip_port (KERBEROS, 88) +ip_port (SU_MIT_TG, 89) +ip_port (DNSIX, 90) +ip_port (MIT_DOV, 91) +ip_port (NPP, 92) +ip_port (DCP, 93) +ip_port (OBJCALL, 94) +ip_port (SUPDUP, 95) +ip_port (DIXIE, 96) +ip_port (SWIFT_RVF, 97) +ip_port (TACNEWS, 98) +ip_port (METAGRAM, 99) +ip_port (NEWACCT, 100) +ip_port (HOSTNAME, 101) +ip_port (ISO_TSAP, 102) +ip_port (GPPITNP, 103) +ip_port (ACR_NEMA, 104) +ip_port (CSO, 105) +ip_port (CSNET_NS, 105) +ip_port (3COM_TSMUX, 106) +ip_port (RTELNET, 107) +ip_port (SNAGAS, 108) +ip_port (POP2, 109) +ip_port (POP3, 110) +ip_port (SUNRPC, 111) +ip_port (MCIDAS, 112) +ip_port (IDENT, 113) +ip_port (SFTP, 115) +ip_port (ANSANOTIFY, 116) +ip_port (UUCP_PATH, 117) +ip_port (SQLSERV, 118) +ip_port (NNTP, 119) +ip_port (CFDPTKT, 120) +ip_port (ERPC, 121) +ip_port (SMAKYNET, 122) +ip_port (NTP, 123) +ip_port (ANSATRADER, 124) +ip_port (LOCUS_MAP, 125) +ip_port (NXEDIT, 126) +ip_port (LOCUS_CON, 127) +ip_port (GSS_XLICEN, 128) +ip_port (PWDGEN, 129) +ip_port (CISCO_FNA, 130) +ip_port (CISCO_TNA, 131) +ip_port (CISCO_SYS, 132) +ip_port (STATSRV, 133) +ip_port (INGRES_NET, 134) +ip_port (EPMAP, 135) +ip_port (PROFILE, 136) +ip_port (NETBIOS_NS, 137) +ip_port (NETBIOS_DGM, 138) +ip_port (NETBIOS_SSN, 139) +ip_port (EMFIS_DATA, 140) +ip_port (EMFIS_CNTL, 141) +ip_port (BL_IDM, 142) +ip_port (IMAP, 143) +ip_port (UMA, 144) +ip_port (UAAC, 145) +ip_port (ISO_TP0, 146) +ip_port (ISO_IP, 147) +ip_port (JARGON, 148) +ip_port (AED_512, 149) +ip_port (SQL_NET, 150) +ip_port (HEMS, 151) +ip_port (BFTP, 152) +ip_port (SGMP, 153) +ip_port (NETSC_PROD, 154) +ip_port (NETSC_DEV, 155) +ip_port (SQLSRV, 156) +ip_port (KNET_CMP, 157) +ip_port (PCMAIL_SRV, 158) +ip_port (NSS_ROUTING, 159) +ip_port (SGMP_TRAPS, 160) +ip_port (SNMP, 161) +ip_port (SNMPTRAP, 162) +ip_port (CMIP_MAN, 163) +ip_port (CMIP_AGENT, 164) +ip_port (XNS_COURIER, 165) +ip_port (S_NET, 166) +ip_port (NAMP, 167) +ip_port (RSVD, 168) +ip_port (SEND, 169) +ip_port (PRINT_SRV, 170) +ip_port (MULTIPLEX, 171) +ip_port (CL1, 172) +ip_port (XYPLEX_MUX, 173) +ip_port (MAILQ, 174) +ip_port (VMNET, 175) +ip_port (GENRAD_MUX, 176) +ip_port (XDMCP, 177) +ip_port (NEXTSTEP, 178) +ip_port (BGP, 179) +ip_port (RIS, 180) +ip_port (UNIFY, 181) +ip_port (AUDIT, 182) +ip_port (OCBINDER, 183) +ip_port (OCSERVER, 184) +ip_port (REMOTE_KIS, 185) +ip_port (KIS, 186) +ip_port (ACI, 187) +ip_port (MUMPS, 188) +ip_port (QFT, 189) +ip_port (GACP, 190) +ip_port (PROSPERO, 191) +ip_port (OSU_NMS, 192) +ip_port (SRMP, 193) +ip_port (IRC, 194) +ip_port (DN6_NLM_AUD, 195) +ip_port (DN6_SMM_RED, 196) +ip_port (DLS, 197) +ip_port (DLS_MON, 198) +ip_port (SMUX, 199) +ip_port (SRC, 200) +ip_port (AT_RTMP, 201) +ip_port (AT_NBP, 202) +ip_port (AT_3, 203) +ip_port (AT_ECHO, 204) +ip_port (AT_5, 205) +ip_port (AT_ZIS, 206) +ip_port (AT_7, 207) +ip_port (AT_8, 208) +ip_port (QMTP, 209) +ip_port (Z39_50, 210) +ip_port (TI914CG, 211) +ip_port (ANET, 212) +ip_port (IPX, 213) +ip_port (VMPWSCS, 214) +ip_port (SOFTPC, 215) +ip_port (CAILIC, 216) +ip_port (DBASE, 217) +ip_port (MPP, 218) +ip_port (UARPS, 219) +ip_port (IMAP3, 220) +ip_port (FLN_SPX, 221) +ip_port (RSH_SPX, 222) +ip_port (CDC, 223) +ip_port (MASQDIALER, 224) +ip_port (DIRECT, 242) +ip_port (SUR_MEAS, 243) +ip_port (INBUSINESS, 244) +ip_port (LINK, 245) +ip_port (DSP3270, 246) +ip_port (SUBNTBCST_TFTP, 247) +ip_port (BHFHS, 248) +ip_port (RAP1, 256) +ip_port (SET, 257) +ip_port (YAK_CHAT, 258) +ip_port (ESRO_GEN, 259) +ip_port (OPENPORT, 260) +ip_port (NSIIOPS, 261) +ip_port (ARCISDMS, 262) +ip_port (HDAP, 263) +ip_port (BGMP, 264) +ip_port (X_BONE_CTL, 265) +ip_port (SST, 266) +ip_port (TD_SERVICE, 267) +ip_port (TD_REPLICA, 268) +ip_port (HTTP_MGMT, 280) +ip_port (PERSONAL_LINK, 281) +ip_port (CABLEPORT_AX, 282) +ip_port (RESCAP, 283) +ip_port (CORERJD, 284) +ip_port (FXP, 286) +ip_port (K_BLOCK, 287) +ip_port (NOVASTORBAKCUP, 308) +ip_port (ENTRUSTTIME, 309) +ip_port (BHMDS, 310) +ip_port (ASIP_WEBADMIN, 311) +ip_port (VSLMP, 312) +ip_port (MAGENTA_LOGIC, 313) +ip_port (OPALIS_ROBOT, 314) +ip_port (DPSI, 315) +ip_port (DECAUTH, 316) +ip_port (ZANNET, 317) +ip_port (PKIX_TIMESTAMP, 318) +ip_port (PTP_EVENT, 319) +ip_port (PTP_GENERAL, 320) +ip_port (PIP, 321) +ip_port (RTSPS, 322) +ip_port (TEXAR, 333) +ip_port (PDAP, 344) +ip_port (PAWSERV, 345) +ip_port (ZSERV, 346) +ip_port (FATSERV, 347) +ip_port (CSI_SGWP, 348) +ip_port (MFTP, 349) +ip_port (MATIP_TYPE_A, 350) +ip_port (MATIP_TYPE_B, 351) +ip_port (BHOETTY, 351) +ip_port (DTAG_STE_SB, 352) +ip_port (BHOEDAP4, 352) +ip_port (NDSAUTH, 353) +ip_port (BH611, 354) +ip_port (DATEX_ASN, 355) +ip_port (CLOANTO_NET_1, 356) +ip_port (BHEVENT, 357) +ip_port (SHRINKWRAP, 358) +ip_port (NSRMP, 359) +ip_port (SCOI2ODIALOG, 360) +ip_port (SEMANTIX, 361) +ip_port (SRSSEND, 362) +ip_port (RSVP_TUNNEL, 363) +ip_port (AURORA_CMGR, 364) +ip_port (DTK, 365) +ip_port (ODMR, 366) +ip_port (MORTGAGEWARE, 367) +ip_port (QBIKGDP, 368) +ip_port (RPC2PORTMAP, 369) +ip_port (CODAAUTH2, 370) +ip_port (CLEARCASE, 371) +ip_port (ULISTPROC, 372) +ip_port (LEGENT_1, 373) +ip_port (LEGENT_2, 374) +ip_port (HASSLE, 375) +ip_port (NIP, 376) +ip_port (TNETOS, 377) +ip_port (DSETOS, 378) +ip_port (IS99C, 379) +ip_port (IS99S, 380) +ip_port (HP_COLLECTOR, 381) +ip_port (HP_MANAGED_NODE, 382) +ip_port (HP_ALARM_MGR, 383) +ip_port (ARNS, 384) +ip_port (IBM_APP, 385) +ip_port (ASA, 386) +ip_port (AURP, 387) +ip_port (UNIDATA_LDM, 388) +ip_port (LDAP, 389) +ip_port (UIS, 390) +ip_port (SYNOTICS_RELAY, 391) +ip_port (SYNOTICS_BROKER, 392) +ip_port (META5, 393) +ip_port (EMBL_NDT, 394) +ip_port (NETCP, 395) +ip_port (NETWARE_IP, 396) +ip_port (MPTN, 397) +ip_port (KRYPTOLAN, 398) +ip_port (ISO_TSAP_C2, 399) +ip_port (WORK_SOL, 400) +ip_port (UPS, 401) +ip_port (GENIE, 402) +ip_port (DECAP, 403) +ip_port (NCED, 404) +ip_port (NCLD, 405) +ip_port (IMSP, 406) +ip_port (TIMBUKTU, 407) +ip_port (PRM_SM, 408) +ip_port (PRM_NM, 409) +ip_port (DECLADEBUG, 410) +ip_port (RMT, 411) +ip_port (SYNOPTICS_TRAP, 412) +ip_port (SMSP, 413) +ip_port (INFOSEEK, 414) +ip_port (BNET, 415) +ip_port (SILVERPLATTER, 416) +ip_port (ONMUX, 417) +ip_port (HYPER_G, 418) +ip_port (ARIEL1, 419) +ip_port (SMPTE, 420) +ip_port (ARIEL2, 421) +ip_port (ARIEL3, 422) +ip_port (OPC_JOB_START, 423) +ip_port (OPC_JOB_TRACK, 424) +ip_port (ICAD_EL, 425) +ip_port (SMARTSDP, 426) +ip_port (SVRLOC, 427) +ip_port (OCS_CMU, 428) +ip_port (OCS_AMU, 429) +ip_port (UTMPSD, 430) +ip_port (UTMPCD, 431) +ip_port (IASD, 432) +ip_port (NNSP, 433) +ip_port (MOBILEIP_AGENT, 434) +ip_port (MOBILIP_MN, 435) +ip_port (DNA_CML, 436) +ip_port (COMSCM, 437) +ip_port (DSFGW, 438) +ip_port (DASP, 439) +ip_port (SGCP, 440) +ip_port (DECVMS_SYSMGT, 441) +ip_port (CVC_HOSTD, 442) +ip_port (HTTPS, 443) +ip_port (SNPP, 444) +ip_port (MICROSOFT_DS, 445) +ip_port (DDM_RDB, 446) +ip_port (DDM_DFM, 447) +ip_port (DDM_SSL, 448) +ip_port (AS_SERVERMAP, 449) +ip_port (TSERVER, 450) +ip_port (SFS_SMP_NET, 451) +ip_port (SFS_CONFIG, 452) +ip_port (CREATIVESERVER, 453) +ip_port (CONTENTSERVER, 454) +ip_port (CREATIVEPARTNR, 455) +ip_port (MACON_TCP, 456) +ip_port (SCOHELP, 457) +ip_port (APPLEQTC, 458) +ip_port (AMPR_RCMD, 459) +ip_port (SKRONK, 460) +ip_port (DATASURFSRV, 461) +ip_port (DATASURFSRVSEC, 462) +ip_port (ALPES, 463) +ip_port (KPASSWD, 464) +ip_port (URD, 465) +ip_port (DIGITAL_VRC, 466) +ip_port (MYLEX_MAPD, 467) +ip_port (PHOTURIS, 468) +ip_port (RCP, 469) +ip_port (SCX_PROXY, 470) +ip_port (MONDEX, 471) +ip_port (LJK_LOGIN, 472) +ip_port (HYBRID_POP, 473) +ip_port (TN_TL_W1, 474) +ip_port (TCPNETHASPSRV, 475) +ip_port (TN_TL_FD1, 476) +ip_port (SS7NS, 477) +ip_port (SPSC, 478) +ip_port (IAFSERVER, 479) +ip_port (IAFDBASE, 480) +ip_port (PH, 481) +ip_port (BGS_NSI, 482) +ip_port (ULPNET, 483) +ip_port (INTEGRA_SME, 484) +ip_port (POWERBURST, 485) +ip_port (AVIAN, 486) +ip_port (SAFT, 487) +ip_port (GSS_HTTP, 488) +ip_port (NEST_PROTOCOL, 489) +ip_port (MICOM_PFS, 490) +ip_port (GO_LOGIN, 491) +ip_port (TICF_1, 492) +ip_port (TICF_2, 493) +ip_port (POV_RAY, 494) +ip_port (INTECOURIER, 495) +ip_port (PIM_RP_DISC, 496) +ip_port (DANTZ, 497) +ip_port (SIAM, 498) +ip_port (ISO_ILL, 499) +ip_port (ISAKMP, 500) +ip_port (STMF, 501) +ip_port (ASA_APPL_PROTO, 502) +ip_port (INTRINSA, 503) +ip_port (CITADEL, 504) +ip_port (MAILBOX_LM, 505) +ip_port (OHIMSRV, 506) +ip_port (CRS, 507) +ip_port (XVTTP, 508) +ip_port (SNARE, 509) +ip_port (FCP, 510) +ip_port (PASSGO, 511) +ip_port (EXEC, 512) +ip_port (LOGIN, 513) +ip_port (SHELL, 514) +ip_port (PRINTER, 515) +ip_port (VIDEOTEX, 516) +ip_port (TALK, 517) +ip_port (NTALK, 518) +ip_port (UTIME, 519) +ip_port (EFS, 520) +ip_port (RIPNG, 521) +ip_port (ULP, 522) +ip_port (IBM_DB2, 523) +ip_port (NCP, 524) +ip_port (TIMED, 525) +ip_port (TEMPO, 526) +ip_port (STX, 527) +ip_port (CUSTIX, 528) +ip_port (IRC_SERV, 529) +ip_port (COURIER, 530) +ip_port (CONFERENCE, 531) +ip_port (NETNEWS, 532) +ip_port (NETWALL, 533) +ip_port (MM_ADMIN, 534) +ip_port (IIOP, 535) +ip_port (OPALIS_RDV, 536) +ip_port (NMSP, 537) +ip_port (GDOMAP, 538) +ip_port (APERTUS_LDP, 539) +ip_port (UUCP, 540) +ip_port (UUCP_RLOGIN, 541) +ip_port (COMMERCE, 542) +ip_port (KLOGIN, 543) +ip_port (KSHELL, 544) +ip_port (APPLEQTCSRVR, 545) +ip_port (DHCPV6_CLIENT, 546) +ip_port (DHCPV6_SERVER, 547) +ip_port (AFPOVERTCP, 548) +ip_port (IDFP, 549) +ip_port (NEW_RWHO, 550) +ip_port (CYBERCASH, 551) +ip_port (DEVSHR_NTS, 552) +ip_port (PIRP, 553) +ip_port (RTSP, 554) +ip_port (DSF, 555) +ip_port (REMOTEFS, 556) +ip_port (OPENVMS_SYSIPC, 557) +ip_port (SDNSKMP, 558) +ip_port (TEEDTAP, 559) +ip_port (RMONITOR, 560) +ip_port (MONITOR, 561) +ip_port (CHSHELL, 562) +ip_port (NNTPS, 563) +ip_port (9PFS, 564) +ip_port (WHOAMI, 565) +ip_port (STREETTALK, 566) +ip_port (BANYAN_RPC, 567) +ip_port (MS_SHUTTLE, 568) +ip_port (MS_ROME, 569) +ip_port (METER, 570) +ip_port (METER1, 571) +ip_port (SONAR, 572) +ip_port (BANYAN_VIP, 573) +ip_port (FTP_AGENT, 574) +ip_port (VEMMI, 575) +ip_port (IPCD, 576) +ip_port (VNAS, 577) +ip_port (IPDD, 578) +ip_port (DECBSRV, 579) +ip_port (SNTP_HEARTBEAT, 580) +ip_port (BDP, 581) +ip_port (SCC_SECURITY, 582) +ip_port (PHILIPS_VC, 583) +ip_port (KEYSERVER, 584) +ip_port (IMAP4_SSL, 585) +ip_port (PASSWORD_CHG, 586) +ip_port (SUBMISSION, 587) +ip_port (CAL, 588) +ip_port (EYELINK, 589) +ip_port (TNS_CML, 590) +ip_port (HTTP_ALT, 591) +ip_port (EUDORA_SET, 592) +ip_port (HTTP_RPC_EPMAP, 593) +ip_port (TPIP, 594) +ip_port (CAB_PROTOCOL, 595) +ip_port (SMSD, 596) +ip_port (PTCNAMESERVICE, 597) +ip_port (SCO_WEBSRVRMG3, 598) +ip_port (ACP, 599) +ip_port (IPCSERVER, 600) +ip_port (SYSLOG_CONN, 601) +ip_port (XMLRPC_BEEP, 602) +ip_port (IDXP, 603) +ip_port (TUNNEL, 604) +ip_port (SOAP_BEEP, 605) +ip_port (URM, 606) +ip_port (NQS, 607) +ip_port (SIFT_UFT, 608) +ip_port (NPMP_TRAP, 609) +ip_port (NPMP_LOCAL, 610) +ip_port (NPMP_GUI, 611) +ip_port (HMMP_IND, 612) +ip_port (HMMP_OP, 613) +ip_port (SSHELL, 614) +ip_port (SCO_INETMGR, 615) +ip_port (SCO_SYSMGR, 616) +ip_port (SCO_DTMGR, 617) +ip_port (DEI_ICDA, 618) +ip_port (COMPAQ_EVM, 619) +ip_port (SCO_WEBSRVRMGR, 620) +ip_port (ESCP_IP, 621) +ip_port (COLLABORATOR, 622) +ip_port (ASF_RMCP, 623) +ip_port (CRYPTOADMIN, 624) +ip_port (DEC_DLM, 625) +ip_port (ASIA, 626) +ip_port (PASSGO_TIVOLI, 627) +ip_port (QMQP, 628) +ip_port (3COM_AMP3, 629) +ip_port (RDA, 630) +ip_port (IPP, 631) +ip_port (BMPP, 632) +ip_port (SERVSTAT, 633) +ip_port (GINAD, 634) +ip_port (RLZDBASE, 635) +ip_port (LDAPS, 636) +ip_port (LANSERVER, 637) +ip_port (MCNS_SEC, 638) +ip_port (MSDP, 639) +ip_port (ENTRUST_SPS, 640) +ip_port (REPCMD, 641) +ip_port (ESRO_EMSDP, 642) +ip_port (SANITY, 643) +ip_port (DWR, 644) +ip_port (PSSC, 645) +ip_port (LDP, 646) +ip_port (DHCP_FAILOVER, 647) +ip_port (RRP, 648) +ip_port (CADVIEW_3D, 649) +ip_port (OBEX, 650) +ip_port (IEEE_MMS, 651) +ip_port (HELLO_PORT, 652) +ip_port (REPSCMD, 653) +ip_port (AODV, 654) +ip_port (TINC, 655) +ip_port (SPMP, 656) +ip_port (RMC, 657) +ip_port (TENFOLD, 658) +ip_port (MAC_SRVR_ADMIN, 660) +ip_port (HAP, 661) +ip_port (PFTP, 662) +ip_port (PURENOISE, 663) +ip_port (ASF_SECURE_RMCP, 664) +ip_port (SUN_DR, 665) +ip_port (MDQS, 666) +ip_port (DOOM, 666) +ip_port (DISCLOSE, 667) +ip_port (MECOMM, 668) +ip_port (MEREGISTER, 669) +ip_port (VACDSM_SWS, 670) +ip_port (VACDSM_APP, 671) +ip_port (VPPS_QUA, 672) +ip_port (CIMPLEX, 673) +ip_port (ACAP, 674) +ip_port (DCTP, 675) +ip_port (VPPS_VIA, 676) +ip_port (VPP, 677) +ip_port (GGF_NCP, 678) +ip_port (MRM, 679) +ip_port (ENTRUST_AAAS, 680) +ip_port (ENTRUST_AAMS, 681) +ip_port (XFR, 682) +ip_port (CORBA_IIOP, 683) +ip_port (CORBA_IIOP_SSL, 684) +ip_port (MDC_PORTMAPPER, 685) +ip_port (HCP_WISMAR, 686) +ip_port (ASIPREGISTRY, 687) +ip_port (REALM_RUSD, 688) +ip_port (NMAP, 689) +ip_port (VATP, 690) +ip_port (MSEXCH_ROUTING, 691) +ip_port (HYPERWAVE_ISP, 692) +ip_port (CONNENDP, 693) +ip_port (HA_CLUSTER, 694) +ip_port (IEEE_MMS_SSL, 695) +ip_port (RUSHD, 696) +ip_port (UUIDGEN, 697) +ip_port (OLSR, 698) +ip_port (ACCESSNETWORK, 699) +ip_port (EPP, 700) +ip_port (LMP, 701) +ip_port (IRIS_BEEP, 702) +ip_port (ELCSD, 704) +ip_port (AGENTX, 705) +ip_port (SILC, 706) +ip_port (BORLAND_DSJ, 707) +ip_port (ENTRUST_KMSH, 709) +ip_port (ENTRUST_ASH, 710) +ip_port (CISCO_TDP, 711) +ip_port (TBRPF, 712) +ip_port (NETVIEWDM1, 729) +ip_port (NETVIEWDM2, 730) +ip_port (NETVIEWDM3, 731) +ip_port (NETGW, 741) +ip_port (NETRCS, 742) +ip_port (FLEXLM, 744) +ip_port (FUJITSU_DEV, 747) +ip_port (RIS_CM, 748) +ip_port (KERBEROS_ADM, 749) +ip_port (RFILE, 750) +ip_port (PUMP, 751) +ip_port (QRH, 752) +ip_port (RRH, 753) +ip_port (TELL, 754) +ip_port (NLOGIN, 758) +ip_port (CON, 759) +ip_port (NS, 760) +ip_port (RXE, 761) +ip_port (QUOTAD, 762) +ip_port (CYCLESERV, 763) +ip_port (OMSERV, 764) +ip_port (WEBSTER, 765) +ip_port (PHONEBOOK, 767) +ip_port (VID, 769) +ip_port (CADLOCK, 770) +ip_port (RTIP, 771) +ip_port (CYCLESERV2, 772) +ip_port (SUBMIT, 773) +ip_port (RPASSWD, 774) +ip_port (ENTOMB, 775) +ip_port (WPAGES, 776) +ip_port (MULTILING_HTTP, 777) +ip_port (WPGS, 780) +ip_port (MDBS_DAEMON, 800) +ip_port (DEVICE, 801) +ip_port (FCP_UDP, 810) +ip_port (ITM_MCELL_S, 828) +ip_port (PKIX_3_CA_RA, 829) +ip_port (DHCP_FAILOVER2, 847) +ip_port (GDOI, 848) +ip_port (ISCSI, 860) +ip_port (RSYNC, 873) +ip_port (ICLCNET_LOCATE, 886) +ip_port (ICLCNET_SVINFO, 887) +ip_port (ACCESSBUILDER, 888) +ip_port (CDDBP, 888) +ip_port (OMGINITIALREFS, 900) +ip_port (SMPNAMERES, 901) +ip_port (IDEAFARM_CHAT, 902) +ip_port (IDEAFARM_CATCH, 903) +ip_port (XACT_BACKUP, 911) +ip_port (APEX_MESH, 912) +ip_port (APEX_EDGE, 913) +ip_port (FTPS_DATA, 989) +ip_port (FTPS, 990) +ip_port (NAS, 991) +ip_port (TELNETS, 992) +ip_port (IMAPS, 993) +ip_port (IRCS, 994) +ip_port (POP3S, 995) +ip_port (VSINET, 996) +ip_port (MAITRD, 997) +ip_port (BUSBOY, 998) +ip_port (GARCON, 999) +ip_port (PUPROUTER, 999) +ip_port (CADLOCK2, 1000) +ip_port (SURF, 1010) + diff --git a/vnet/vnet/ip/protocols.def b/vnet/vnet/ip/protocols.def new file mode 100644 index 00000000000..77fab31da05 --- /dev/null +++ b/vnet/vnet/ip/protocols.def @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Emacs editing mode -*-C-*- + +From http://www.iana.org/assignments/protocol-numbers + +PROTOCOL NUMBERS + +(last updated 18 October 2004) + +In the Internet Protocol version 4 (IPv4) [RFC791] there is a field, +called "Protocol", to identify the next level protocol. This is an 8 +bit field. In Internet Protocol version 6 (IPv6) [RFC1883] this field +is called the "Next Header" field. +*/ +ip_protocol (0, IP6_HOP_BY_HOP_OPTIONS) +ip_protocol (1, ICMP) +ip_protocol (2, IGMP) +ip_protocol (3, GGP) +ip_protocol (4, IP_IN_IP) +ip_protocol (5, ST) +ip_protocol (6, TCP) +ip_protocol (7, CBT) +ip_protocol (8, EGP) +ip_protocol (9, IGP) +ip_protocol (10, BBN_RCC_MON) +ip_protocol (11, NVP_II) +ip_protocol (12, PUP) +ip_protocol (13, ARGUS) +ip_protocol (14, EMCON) +ip_protocol (15, XNET) +ip_protocol (16, CHAOS) +ip_protocol (17, UDP) +ip_protocol (18, MUX) +ip_protocol (19, DCN_MEAS) +ip_protocol (20, HMP) +ip_protocol (21, PRM) +ip_protocol (22, XNS_IDP) +ip_protocol (23, TRUNK_1) +ip_protocol (24, TRUNK_2) +ip_protocol (25, LEAF_1) +ip_protocol (26, LEAF_2) +ip_protocol (27, RDP) +ip_protocol (28, IRTP) +ip_protocol (29, ISO_TP4) +ip_protocol (30, NETBLT) +ip_protocol (31, MFE_NSP) +ip_protocol (32, MERIT_INP) +ip_protocol (33, SEP) +ip_protocol (34, 3PC) +ip_protocol (35, IDPR) +ip_protocol (36, XTP) +ip_protocol (37, DDP) +ip_protocol (38, IDPR_CMTP) +ip_protocol (39, TP) +ip_protocol (40, IL) +ip_protocol (41, IPV6) +ip_protocol (42, SDRP) +ip_protocol (43, IPV6_ROUTE) +ip_protocol (44, IPV6_FRAGMENTATION) +ip_protocol (45, IDRP) +ip_protocol (46, RSVP) +ip_protocol (47, GRE) +ip_protocol (48, MHRP) +ip_protocol (49, BNA) +ip_protocol (50, IPSEC_ESP) +ip_protocol (51, IPSEC_AH) +ip_protocol (52, I_NLSP) +ip_protocol (53, SWIPE) +ip_protocol (54, NARP) +ip_protocol (55, MOBILE) +ip_protocol (56, TLSP) +ip_protocol (57, SKIP) +ip_protocol (58, ICMP6) +ip_protocol (59, IP6_NONXT) +ip_protocol (60, IP6_DESTINATION_OPTIONS) +ip_protocol (62, CFTP) +ip_protocol (64, SAT_EXPAK) +ip_protocol (65, KRYPTOLAN) +ip_protocol (66, RVD) +ip_protocol (67, IPPC) +ip_protocol (69, SAT_MON) +ip_protocol (70, VISA) +ip_protocol (71, IPCV) +ip_protocol (72, CPNX) +ip_protocol (73, CPHB) +ip_protocol (74, WSN) +ip_protocol (75, PVP) +ip_protocol (76, BR_SAT_MON) +ip_protocol (77, SUN_ND) +ip_protocol (78, WB_MON) +ip_protocol (79, WB_EXPAK) +ip_protocol (80, ISO_IP) +ip_protocol (81, VMTP) +ip_protocol (82, SECURE_VMTP) +ip_protocol (83, VINES) +ip_protocol (84, TTP) +ip_protocol (85, NSFNET_IGP) +ip_protocol (86, DGP) +ip_protocol (87, TCF) +ip_protocol (88, EIGRP) +ip_protocol (89, OSPF) +ip_protocol (90, SPRITE_RPC) +ip_protocol (91, LARP) +ip_protocol (92, MTP) +ip_protocol (93, AX) +ip_protocol (94, IPIP) +ip_protocol (95, MICP) +ip_protocol (96, SCC_SP) +ip_protocol (97, ETHERIP) +ip_protocol (98, ENCAP) +ip_protocol (100, GMTP) +ip_protocol (101, IFMP) +ip_protocol (102, PNNI) +ip_protocol (103, PIM) +ip_protocol (104, ARIS) +ip_protocol (105, SCPS) +ip_protocol (106, QNX) +ip_protocol (107, A) +ip_protocol (108, IPCOMP) +ip_protocol (109, SNP) +ip_protocol (110, COMPAQ_PEER) +ip_protocol (111, IPX_IN_IP) +ip_protocol (112, VRRP) +ip_protocol (113, PGM) +ip_protocol (115, L2TP) +ip_protocol (116, DDX) +ip_protocol (117, IATP) +ip_protocol (118, STP) +ip_protocol (119, SRP) +ip_protocol (120, UTI) +ip_protocol (121, SMP) +ip_protocol (122, SM) +ip_protocol (123, PTP) +ip_protocol (124, ISIS) +ip_protocol (125, FIRE) +ip_protocol (126, CRTP) +ip_protocol (127, CRUDP) +ip_protocol (128, SSCOPMCE) +ip_protocol (129, IPLT) +ip_protocol (130, SPS) +ip_protocol (131, PIPE) +ip_protocol (132, SCTP) +ip_protocol (133, FC) +ip_protocol (134, RSVP_E2E_IGNORE) +ip_protocol (135, MOBILITY) +ip_protocol (136, UDP_LITE) +ip_protocol (137, MPLS_IN_IP) +ip_protocol (255, RESERVED) + diff --git a/vnet/vnet/ip/tcp.c b/vnet/vnet/ip/tcp.c new file mode 100644 index 00000000000..53f82f1c5b9 --- /dev/null +++ b/vnet/vnet/ip/tcp.c @@ -0,0 +1,2983 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/tcp.c: tcp protocol + * + * Copyright (c) 2011 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ip/tcp.h> +#include <math.h> + +static u8 my_zero_mask_table[256] = { + [0xf0] = (1 << 1), + [0x0f] = (1 << 0), + [0xff] = (1 << 0) | (1 << 1), +}; + +static_always_inline u32 my_zero_mask (u32 x) +{ + return ((my_zero_mask_table[(x >> 0) & 0xff] << 0) + | (my_zero_mask_table[(x >> 8) & 0xff] << 2)); +} + +static u8 my_first_set_table[256] = { + [0x00] = 4, + [0xf0] = 1, + [0x0f] = 0, + [0xff] = 0, +}; + +static_always_inline u32 my_first_set (u32 zero_mask) +{ + u8 r0 = my_first_set_table[(zero_mask >> 0) & 0xff]; + u8 r1 = 2 + my_first_set_table[(zero_mask >> 8) & 0xff]; + return r0 != 4 ? r0 : r1; +} + +static_always_inline void +ip4_tcp_udp_address_x4_set_from_headers (ip4_tcp_udp_address_x4_t * a, + ip4_header_t * ip, + tcp_header_t * tcp, + u32 i) +{ + a->src.as_ip4_address[i] = ip->src_address; + a->dst.as_ip4_address[i] = ip->dst_address; + a->ports.as_ports[i].as_u32 = tcp->ports.src_and_dst; +} + +static_always_inline void +ip4_tcp_udp_address_x4_copy_and_invalidate (ip4_tcp_udp_address_x4_t * dst, + ip4_tcp_udp_address_x4_t * src, + u32 dst_i, u32 src_i) +{ +#define _(d,s) d = s; s = 0; + _ (dst->src.as_ip4_address[dst_i].as_u32, src->src.as_ip4_address[src_i].as_u32); + _ (dst->dst.as_ip4_address[dst_i].as_u32, src->dst.as_ip4_address[src_i].as_u32); + _ (dst->ports.as_ports[dst_i].as_u32, src->ports.as_ports[src_i].as_u32); +#undef _ +} + +static_always_inline void +ip4_tcp_udp_address_x4_invalidate (ip4_tcp_udp_address_x4_t * a, u32 i) +{ + a->src.as_ip4_address[i].as_u32 = 0; + a->dst.as_ip4_address[i].as_u32 = 0; + a->ports.as_ports[i].as_u32 = 0; +} + +static_always_inline uword +ip4_tcp_udp_address_x4_is_valid (ip4_tcp_udp_address_x4_t * a, u32 i) +{ + return !(a->src.as_ip4_address[i].as_u32 == 0 + && a->dst.as_ip4_address[i].as_u32 == 0 + && a->ports.as_ports[i].as_u32 == 0); +} + +#ifdef TCP_HAVE_VEC128 +static_always_inline uword +ip4_tcp_udp_address_x4_match_helper (ip4_tcp_udp_address_x4_t * ax4, + u32x4 src, u32x4 dst, u32x4 ports) +{ + u32x4 r; + u32 m; + + r = u32x4_is_equal (src, ax4->src.as_u32x4); + r &= u32x4_is_equal (dst, ax4->dst.as_u32x4); + r &= u32x4_is_equal (ports, ax4->ports.as_u32x4); + + /* At this point r will be either all zeros (if nothing matched) + or have 32 1s in the position that did match. */ + m = u8x16_compare_byte_mask ((u8x16) r); + + return m; +} + +static_always_inline uword +ip4_tcp_udp_address_x4_match (ip4_tcp_udp_address_x4_t * ax4, + ip4_header_t * ip, + tcp_header_t * tcp) +{ + u32x4 src = u32x4_splat (ip->src_address.as_u32); + u32x4 dst = u32x4_splat (ip->dst_address.as_u32); + u32x4 ports = u32x4_splat (tcp->ports.src_and_dst); + return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, src, dst, ports)); +} + +static_always_inline uword +ip4_tcp_udp_address_x4_first_empty (ip4_tcp_udp_address_x4_t * ax4) +{ + u32x4 zero = {0}; + return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, zero, zero, zero)); +} + +static_always_inline uword +ip4_tcp_udp_address_x4_empty_mask (ip4_tcp_udp_address_x4_t * ax4) +{ + u32x4 zero = {0}; + return my_zero_mask (ip4_tcp_udp_address_x4_match_helper (ax4, zero, zero, zero)); +} +#else /* TCP_HAVE_VEC128 */ +static_always_inline uword +ip4_tcp_udp_address_x4_match_helper (ip4_tcp_udp_address_x4_t * ax4, + u32 src, u32 dst, u32 ports) +{ + u32 r0, r1, r2, r3; + +#define _(i) \ + r##i = (src == ax4->src.as_ip4_address[i].as_u32 \ + && dst == ax4->dst.as_ip4_address[i].as_u32 \ + && ports == ax4->ports.as_ports[i].as_u32) + + _ (0); + _ (1); + _ (2); + _ (3); + +#undef _ + + return (((r0 ? 0xf : 0x0) << 0) + | ((r1 ? 0xf : 0x0) << 4) + | ((r2 ? 0xf : 0x0) << 8) + | ((r3 ? 0xf : 0x0) << 12)); +} + +static_always_inline uword +ip4_tcp_udp_address_x4_match (ip4_tcp_udp_address_x4_t * ax4, + ip4_header_t * ip, + tcp_header_t * tcp) +{ + return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, + ip->src_address.as_u32, + ip->dst_address.as_u32, + tcp->ports.src_and_dst)); +} + +static_always_inline uword +ip4_tcp_udp_address_x4_first_empty (ip4_tcp_udp_address_x4_t * ax4) +{ + return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, 0, 0, 0)); +} + +static_always_inline uword +ip4_tcp_udp_address_x4_empty_mask (ip4_tcp_udp_address_x4_t * ax4) +{ + return my_zero_mask (ip4_tcp_udp_address_x4_match_helper (ax4, 0, 0, 0)); +} +#endif + +static u8 * format_ip4_tcp_udp_address_x4 (u8 * s, va_list * va) +{ + ip4_tcp_udp_address_x4_t * a = va_arg (*va, ip4_tcp_udp_address_x4_t *); + u32 ai = va_arg (*va, u32); + ASSERT (ai < 4); + + s = format (s, "%U:%d -> %U:%d", + format_ip4_address, &a->src.as_ip4_address[ai], + clib_net_to_host_u16 (a->ports.as_ports[ai].src), + format_ip4_address, &a->dst.as_ip4_address[ai], + clib_net_to_host_u16 (a->ports.as_ports[ai].dst)); + + return s; +} + +static_always_inline void +ip6_tcp_udp_address_x4_set_from_headers (ip6_tcp_udp_address_x4_t * a, + ip6_header_t * ip, + tcp_header_t * tcp, + u32 i) +{ + a->src.as_u32[0][i] = ip->src_address.as_u32[0]; + a->src.as_u32[1][i] = ip->src_address.as_u32[1]; + a->src.as_u32[2][i] = ip->src_address.as_u32[2]; + a->src.as_u32[3][i] = ip->src_address.as_u32[3]; + a->dst.as_u32[0][i] = ip->dst_address.as_u32[0]; + a->dst.as_u32[1][i] = ip->dst_address.as_u32[1]; + a->dst.as_u32[2][i] = ip->dst_address.as_u32[2]; + a->dst.as_u32[3][i] = ip->dst_address.as_u32[3]; + a->ports.as_ports[i].as_u32 = tcp->ports.src_and_dst; +} + +static_always_inline void +ip6_tcp_udp_address_x4_copy_and_invalidate (ip6_tcp_udp_address_x4_t * dst, + ip6_tcp_udp_address_x4_t * src, + u32 dst_i, u32 src_i) +{ +#define _(d,s) d = s; s = 0; + _ (dst->src.as_u32[0][dst_i], src->src.as_u32[0][src_i]); + _ (dst->src.as_u32[1][dst_i], src->src.as_u32[1][src_i]); + _ (dst->src.as_u32[2][dst_i], src->src.as_u32[2][src_i]); + _ (dst->src.as_u32[3][dst_i], src->src.as_u32[3][src_i]); + _ (dst->dst.as_u32[0][dst_i], src->dst.as_u32[0][src_i]); + _ (dst->dst.as_u32[1][dst_i], src->dst.as_u32[1][src_i]); + _ (dst->dst.as_u32[2][dst_i], src->dst.as_u32[2][src_i]); + _ (dst->dst.as_u32[3][dst_i], src->dst.as_u32[3][src_i]); + _ (dst->ports.as_ports[dst_i].as_u32, src->ports.as_ports[src_i].as_u32); +#undef _ +} + +static_always_inline void +ip6_tcp_udp_address_x4_invalidate (ip6_tcp_udp_address_x4_t * a, u32 i) +{ + a->src.as_u32[0][i] = 0; + a->src.as_u32[1][i] = 0; + a->src.as_u32[2][i] = 0; + a->src.as_u32[3][i] = 0; + a->dst.as_u32[0][i] = 0; + a->dst.as_u32[1][i] = 0; + a->dst.as_u32[2][i] = 0; + a->dst.as_u32[3][i] = 0; + a->ports.as_ports[i].as_u32 = 0; +} + +static_always_inline uword +ip6_tcp_udp_address_x4_is_valid (ip6_tcp_udp_address_x4_t * a, u32 i) +{ + return !(a->src.as_u32[0][i] == 0 + && a->src.as_u32[1][i] == 0 + && a->src.as_u32[2][i] == 0 + && a->src.as_u32[3][i] == 0 + && a->dst.as_u32[0][i] == 0 + && a->dst.as_u32[1][i] == 0 + && a->dst.as_u32[2][i] == 0 + && a->dst.as_u32[3][i] == 0 + && a->ports.as_ports[i].as_u32 == 0); +} + +#ifdef TCP_HAVE_VEC128 +static_always_inline uword +ip6_tcp_udp_address_x4_match_helper (ip6_tcp_udp_address_x4_t * ax4, + u32x4 src0, u32x4 src1, u32x4 src2, u32x4 src3, + u32x4 dst0, u32x4 dst1, u32x4 dst2, u32x4 dst3, + u32x4 ports) +{ + u32x4 r; + u32 m; + + r = u32x4_is_equal (src0, ax4->src.as_u32x4[0]); + r &= u32x4_is_equal (src1, ax4->src.as_u32x4[1]); + r &= u32x4_is_equal (src2, ax4->src.as_u32x4[2]); + r &= u32x4_is_equal (src3, ax4->src.as_u32x4[3]); + r &= u32x4_is_equal (dst0, ax4->dst.as_u32x4[0]); + r &= u32x4_is_equal (dst1, ax4->dst.as_u32x4[1]); + r &= u32x4_is_equal (dst2, ax4->dst.as_u32x4[2]); + r &= u32x4_is_equal (dst3, ax4->dst.as_u32x4[3]); + r &= u32x4_is_equal (ports, ax4->ports.as_u32x4); + + /* At this point r will be either all zeros (if nothing matched) + or have 32 1s in the position that did match. */ + m = u8x16_compare_byte_mask ((u8x16) r); + + return m; +} + +static_always_inline uword +ip6_tcp_udp_address_x4_match (ip6_tcp_udp_address_x4_t * ax4, + ip6_header_t * ip, + tcp_header_t * tcp) +{ + u32x4 src0 = u32x4_splat (ip->src_address.as_u32[0]); + u32x4 src1 = u32x4_splat (ip->src_address.as_u32[1]); + u32x4 src2 = u32x4_splat (ip->src_address.as_u32[2]); + u32x4 src3 = u32x4_splat (ip->src_address.as_u32[3]); + u32x4 dst0 = u32x4_splat (ip->dst_address.as_u32[0]); + u32x4 dst1 = u32x4_splat (ip->dst_address.as_u32[1]); + u32x4 dst2 = u32x4_splat (ip->dst_address.as_u32[2]); + u32x4 dst3 = u32x4_splat (ip->dst_address.as_u32[3]); + u32x4 ports = u32x4_splat (tcp->ports.src_and_dst); + return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4, + src0, src1, src2, src3, + dst0, dst1, dst2, dst3, + ports)); +} + +static_always_inline uword +ip6_tcp_udp_address_x4_first_empty (ip6_tcp_udp_address_x4_t * ax4) +{ + u32x4 zero = {0}; + return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4, + zero, zero, zero, zero, + zero, zero, zero, zero, + zero)); +} + +static_always_inline uword +ip6_tcp_udp_address_x4_empty_mask (ip6_tcp_udp_address_x4_t * ax4) +{ + u32x4 zero = {0}; + return my_zero_mask (ip6_tcp_udp_address_x4_match_helper (ax4, + zero, zero, zero, zero, + zero, zero, zero, zero, + zero)); +} +#else /* TCP_HAVE_VEC128 */ +static_always_inline uword +ip6_tcp_udp_address_x4_match_helper (ip6_tcp_udp_address_x4_t * ax4, + u32 src0, u32 src1, u32 src2, u32 src3, + u32 dst0, u32 dst1, u32 dst2, u32 dst3, + u32 ports) +{ + u32 r0, r1, r2, r3; + +#define _(i) \ + r##i = (src0 == ax4->src.as_u32[i][0] \ + && src1 == ax4->src.as_u32[i][1] \ + && src2 == ax4->src.as_u32[i][2] \ + && src3 == ax4->src.as_u32[i][3] \ + && dst0 == ax4->dst.as_u32[i][0] \ + && dst1 == ax4->dst.as_u32[i][1] \ + && dst2 == ax4->dst.as_u32[i][2] \ + && dst3 == ax4->dst.as_u32[i][3] \ + && ports == ax4->ports.as_ports[i].as_u32) + + _ (0); + _ (1); + _ (2); + _ (3); + +#undef _ + + return (((r0 ? 0xf : 0x0) << 0) + | ((r1 ? 0xf : 0x0) << 4) + | ((r2 ? 0xf : 0x0) << 8) + | ((r3 ? 0xf : 0x0) << 12)); +} + +static_always_inline uword +ip6_tcp_udp_address_x4_match (ip6_tcp_udp_address_x4_t * ax4, + ip6_header_t * ip, + tcp_header_t * tcp) +{ + u32 src0 = ip->src_address.as_u32[0]; + u32 src1 = ip->src_address.as_u32[1]; + u32 src2 = ip->src_address.as_u32[2]; + u32 src3 = ip->src_address.as_u32[3]; + u32 dst0 = ip->dst_address.as_u32[0]; + u32 dst1 = ip->dst_address.as_u32[1]; + u32 dst2 = ip->dst_address.as_u32[2]; + u32 dst3 = ip->dst_address.as_u32[3]; + u32 ports = tcp->ports.src_and_dst; + return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4, + src0, src1, src2, src3, + dst0, dst1, dst2, dst3, + ports)); +} + +static_always_inline uword +ip6_tcp_udp_address_x4_first_empty (ip6_tcp_udp_address_x4_t * ax4) +{ + return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0)); +} + +static_always_inline uword +ip6_tcp_udp_address_x4_empty_mask (ip6_tcp_udp_address_x4_t * ax4) +{ + return my_zero_mask (ip6_tcp_udp_address_x4_match_helper (ax4, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0)); +} +#endif /* ! TCP_HAVE_VEC128 */ + +static u8 * format_ip6_tcp_udp_address_x4 (u8 * s, va_list * va) +{ + ip6_tcp_udp_address_x4_t * a = va_arg (*va, ip6_tcp_udp_address_x4_t *); + u32 i, ai = va_arg (*va, u32); + ip6_address_t src, dst; + + ASSERT (ai < 4); + for (i = 0; i < 4; i++) + { + src.as_u32[i] = a->src.as_u32[i][ai]; + dst.as_u32[i] = a->dst.as_u32[i][ai]; + } + + s = format (s, "%U:%d -> %U:%d", + format_ip6_address, &src, + clib_net_to_host_u16 (a->ports.as_ports[ai].src), + format_ip6_address, &dst, + clib_net_to_host_u16 (a->ports.as_ports[ai].dst)); + + return s; +} + +static_always_inline u32 +find_oldest_timestamp_x4 (u32 * time_stamps, u32 now) +{ + u32 dt0, dt_min0, i_min0; + u32 dt1, dt_min1, i_min1; + + i_min0 = i_min1 = 0; + dt_min0 = now - time_stamps[0]; + dt_min1 = now - time_stamps[2]; + dt0 = now - time_stamps[1]; + dt1 = now - time_stamps[3]; + + i_min0 += dt0 > dt_min0; + i_min1 += dt1 > dt_min1; + + dt_min0 = i_min0 > 0 ? dt0 : dt_min0; + dt_min1 = i_min1 > 0 ? dt1 : dt_min1; + + return dt_min0 > dt_min1 ? i_min0 : (2 + i_min1); +} + +static_always_inline uword +tcp_round_trip_time_stats_is_valid (tcp_round_trip_time_stats_t * s) +{ return s->count > 0; } + +static_always_inline void +tcp_round_trip_time_stats_compute (tcp_round_trip_time_stats_t * s, f64 * r) +{ + f64 ave, rms; + ASSERT (s->count > 0); + ave = s->sum / s->count; + rms = sqrt (s->sum2 / s->count - ave*ave); + r[0] = ave; + r[1] = rms; +} + +typedef struct { + tcp_option_type_t type : 8; + u8 length; + u32 my_time_stamp, his_time_stamp; +} __attribute__ ((packed)) tcp_time_stamp_option_t; + +typedef struct { + tcp_header_t header; + + struct { + struct { + tcp_option_type_t type : 8; + u8 length; + u16 value; + } mss; + + struct { + tcp_option_type_t type : 8; + u8 length; + u8 value; + } __attribute__ ((packed)) window_scale; + + u8 nops[3]; + + tcp_time_stamp_option_t time_stamp; + } __attribute__ ((packed)) options; +} __attribute__ ((packed)) tcp_syn_packet_t; + +typedef struct { + tcp_header_t header; + + struct { + u8 nops[2]; + + tcp_time_stamp_option_t time_stamp; + } options; +} __attribute__ ((packed)) tcp_ack_packet_t; + +typedef struct { + ip4_header_t ip4; + tcp_syn_packet_t tcp; +} ip4_tcp_syn_packet_t; + +typedef struct { + ip4_header_t ip4; + tcp_ack_packet_t tcp; +} ip4_tcp_ack_packet_t; + +typedef struct { + ip6_header_t ip6; + tcp_syn_packet_t tcp; +} ip6_tcp_syn_packet_t; + +typedef struct { + ip6_header_t ip6; + tcp_ack_packet_t tcp; +} ip6_tcp_ack_packet_t; + +static_always_inline void +ip4_tcp_packet_init (ip4_header_t * ip, u32 n_bytes) +{ + ip->ip_version_and_header_length = 0x45; + + ip->tos = ip4_main.host_config.tos; + ip->ttl = ip4_main.host_config.ttl; + + /* No need to set fragment ID due to DF bit. */ + ip->flags_and_fragment_offset = clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); + + ip->protocol = IP_PROTOCOL_TCP; + + ip->length = clib_host_to_net_u16 (n_bytes); + + ip->checksum = ip4_header_checksum (ip); +} + +static_always_inline void +ip6_tcp_packet_init (ip6_header_t * ip, u32 n_bytes) +{ + ip->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28); + + ip->payload_length = clib_host_to_net_u16 (n_bytes - sizeof (ip[0])); + + ip->hop_limit = ip6_main.host_config.ttl; +} + +static_always_inline u32 +tcp_time_now (tcp_main_t * tm, tcp_timer_type_t t) +{ + ASSERT (t < ARRAY_LEN (tm->log2_clocks_per_tick)); + return clib_cpu_time_now () >> tm->log2_clocks_per_tick[t]; +} + +static void +tcp_time_init (vlib_main_t * vm, tcp_main_t * tm) +{ + int i; + f64 log2 = .69314718055994530941; + + for (i = 0; i < ARRAY_LEN (tm->log2_clocks_per_tick); i++) + { + static f64 t[] = { +#define _(f,r) r, + foreach_tcp_timer +#undef _ + }; + tm->log2_clocks_per_tick[i] = + flt_round_nearest (log (t[i] / vm->clib_time.seconds_per_clock) / log2); + tm->secs_per_tick[i] = vm->clib_time.seconds_per_clock * (1 << tm->log2_clocks_per_tick[i]); + } +} + +tcp_main_t tcp_main; + +typedef enum { + TCP_LOOKUP_NEXT_DROP, + TCP_LOOKUP_NEXT_PUNT, + TCP_LOOKUP_NEXT_LISTEN_SYN, + TCP_LOOKUP_NEXT_LISTEN_ACK, + TCP_LOOKUP_NEXT_CONNECT_SYN_ACK, + TCP_LOOKUP_NEXT_ESTABLISHED, + TCP_LOOKUP_N_NEXT, +} tcp_lookup_next_t; + +#define foreach_tcp_error \ + _ (NONE, "no error") \ + _ (LOOKUP_DROPS, "lookup drops") \ + _ (LISTEN_RESPONSES, "listen responses sent") \ + _ (CONNECTS_SENT, "connects sent") \ + _ (LISTENS_ESTABLISHED, "listens connected") \ + _ (UNEXPECTED_SEQ_NUMBER, "unexpected sequence number drops") \ + _ (UNEXPECTED_ACK_NUMBER, "unexpected acknowledgment number drops") \ + _ (CONNECTS_ESTABLISHED, "connects established") \ + _ (NO_LISTENER_FOR_PORT, "no listener for port") \ + _ (WRONG_LOCAL_ADDRESS_FOR_PORT, "wrong local address for port") \ + _ (ACKS_SENT, "acks sent for established connections") \ + _ (NO_DATA, "acks with no data") \ + _ (FINS_RECEIVED, "fins received") \ + _ (SEGMENT_AFTER_FIN, "segments dropped after fin received") \ + _ (CONNECTIONS_CLOSED, "connections closed") + +typedef enum { +#define _(sym,str) TCP_ERROR_##sym, + foreach_tcp_error +#undef _ + TCP_N_ERROR, +} tcp_error_t; + +#ifdef TCP_HAVE_VEC128 +static_always_inline u32x4 u32x4_splat_x2 (u32 x) +{ + u32x4 r = u32x4_set0 (x); + return u32x4_interleave_lo (r, r); +} + +static_always_inline u32x4 u32x4_set_x2 (u32 x, u32 y) +{ + u32x4 r0 = u32x4_set0 (x); + u32x4 r1 = u32x4_set0 (y); + return u32x4_interleave_lo (r0, r1); +} + +/* FIXME */ +#define u32x4_get(x,i) \ + __builtin_ia32_vec_ext_v4si ((i32x4) (x), (int) (i)) +#else /* TCP_HAVE_VEC128 */ +#endif /* TCP_HAVE_VEC128 */ + +/* Dispatching on tcp/udp listeners (by dst port) + and tcp/udp connections (by src/dst address/port). */ +static_always_inline uword +ip46_tcp_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + uword is_ip6) +{ + tcp_main_t * tm = &tcp_main; + ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4; + uword n_packets = frame->n_vectors; + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next, mini_now; + vlib_node_runtime_t * error_node = node; + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next = node->cached_next_index; + mini_now = tcp_time_now (tm, TCP_TIMER_mini_connection); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip60; + ip4_header_t * ip40; + tcp_header_t * tcp0; + u32 bi0, imin0, iest0, li0; + tcp_connection_state_t state0; + u8 error0, next0; + u8 min_match0, est_match0, is_min_match0, is_est_match0; + u8 min_oldest0, est_first_empty0; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + +#ifdef TCP_HAVE_VEC128 + { + u32x4 a0, b0, c0; + + a0 = tm->connection_hash_seeds[is_ip6][0].as_u32x4; + b0 = tm->connection_hash_seeds[is_ip6][1].as_u32x4; + c0 = tm->connection_hash_seeds[is_ip6][2].as_u32x4; + + if (is_ip6) + { + ip60 = vlib_buffer_get_current (p0); + tcp0 = ip6_next_header (ip60); + + a0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[0]); + b0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[1]); + c0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[2]); + + hash_v3_mix_u32x (a0, b0, c0); + + a0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[3]); + b0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[0]); + c0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[1]); + + hash_v3_mix_u32x (a0, b0, c0); + + a0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[2]); + b0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[3]); + c0 ^= u32x4_splat_x2 (tcp0->ports.src_and_dst); + } + else + { + ip40 = vlib_buffer_get_current (p0); + tcp0 = ip4_next_header (ip40); + + a0 ^= u32x4_splat_x2 (ip40->src_address.as_u32); + b0 ^= u32x4_splat_x2 (ip40->dst_address.as_u32); + c0 ^= u32x4_splat_x2 (tcp0->ports.src_and_dst); + } + + hash_v3_finalize_u32x (a0, b0, c0); + + c0 &= tm->connection_hash_masks[is_ip6].as_u32x4; + + imin0 = u32x4_get0 (c0); + iest0 = u32x4_get (c0, 1); + } +#else + { + u32 a00, a01, b00, b01, c00, c01; + + a00 = tm->connection_hash_seeds[is_ip6][0].as_u32[0]; + a01 = tm->connection_hash_seeds[is_ip6][0].as_u32[1]; + b00 = tm->connection_hash_seeds[is_ip6][1].as_u32[0]; + b01 = tm->connection_hash_seeds[is_ip6][1].as_u32[1]; + c00 = tm->connection_hash_seeds[is_ip6][2].as_u32[0]; + c01 = tm->connection_hash_seeds[is_ip6][2].as_u32[1]; + + if (is_ip6) + { + ip60 = vlib_buffer_get_current (p0); + tcp0 = ip6_next_header (ip60); + + a00 ^= ip60->src_address.as_u32[0]; + a01 ^= ip60->src_address.as_u32[0]; + b00 ^= ip60->src_address.as_u32[1]; + b01 ^= ip60->src_address.as_u32[1]; + c00 ^= ip60->src_address.as_u32[2]; + c01 ^= ip60->src_address.as_u32[2]; + + hash_v3_mix32 (a00, b00, c00); + hash_v3_mix32 (a01, b01, c01); + + a00 ^= ip60->src_address.as_u32[3]; + a01 ^= ip60->src_address.as_u32[3]; + b00 ^= ip60->dst_address.as_u32[0]; + b01 ^= ip60->dst_address.as_u32[0]; + c00 ^= ip60->dst_address.as_u32[1]; + c01 ^= ip60->dst_address.as_u32[1]; + + hash_v3_mix32 (a00, b00, c00); + hash_v3_mix32 (a01, b01, c01); + + a00 ^= ip60->dst_address.as_u32[2]; + a01 ^= ip60->dst_address.as_u32[2]; + b00 ^= ip60->dst_address.as_u32[3]; + b01 ^= ip60->dst_address.as_u32[3]; + c00 ^= tcp0->ports.src_and_dst; + c01 ^= tcp0->ports.src_and_dst; + } + else + { + ip40 = vlib_buffer_get_current (p0); + tcp0 = ip4_next_header (ip40); + + a00 ^= ip40->src_address.as_u32; + a01 ^= ip40->src_address.as_u32; + b00 ^= ip40->dst_address.as_u32; + b01 ^= ip40->dst_address.as_u32; + c00 ^= tcp0->ports.src_and_dst; + c01 ^= tcp0->ports.src_and_dst; + } + + hash_v3_finalize32 (a00, b00, c00); + hash_v3_finalize32 (a01, b01, c01); + + c00 &= tm->connection_hash_masks[is_ip6].as_u32[0]; + c01 &= tm->connection_hash_masks[is_ip6].as_u32[1]; + + imin0 = c00; + iest0 = c01; + } +#endif + + if (is_ip6) + { + ip6_tcp_udp_address_x4_and_timestamps_t * mina0; + ip6_tcp_udp_address_x4_t * esta0; + + mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0); + esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest0); + + min_match0 = ip6_tcp_udp_address_x4_match (&mina0->address_x4, ip60, tcp0); + est_match0 = ip6_tcp_udp_address_x4_match (esta0, ip60, tcp0); + + min_oldest0 = find_oldest_timestamp_x4 (mina0->time_stamps, mini_now); + est_first_empty0 = ip6_tcp_udp_address_x4_first_empty (esta0); + + if (PREDICT_FALSE (! est_match0 && est_first_empty0 >= 4 && ! min_match0)) + { + /* Lookup in overflow hash. */ + ASSERT (0); + } + } + else + { + ip4_tcp_udp_address_x4_and_timestamps_t * mina0; + ip4_tcp_udp_address_x4_t * esta0; + + mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0); + esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest0); + + min_match0 = ip4_tcp_udp_address_x4_match (&mina0->address_x4, ip40, tcp0); + est_match0 = ip4_tcp_udp_address_x4_match (esta0, ip40, tcp0); + + min_oldest0 = find_oldest_timestamp_x4 (mina0->time_stamps, mini_now); + est_first_empty0 = ip4_tcp_udp_address_x4_first_empty (esta0); + + if (PREDICT_FALSE (! est_match0 && est_first_empty0 >= 4 && ! min_match0)) + { + /* Lookup in overflow hash. */ + ASSERT (0); + } + } + + is_min_match0 = min_match0 < 4; + is_est_match0 = est_match0 < 4; + + imin0 = 4 * imin0 + (is_min_match0 ? min_match0 : min_oldest0); + iest0 = 4 * iest0 + (is_est_match0 ? est_match0 : est_first_empty0); + + /* Should simultaneously not match both in mini and established connection tables. */ + ASSERT (! (is_min_match0 && is_est_match0)); + + { + tcp_mini_connection_t * min0; + tcp_connection_t * est0; + tcp_sequence_pair_t * seq_pair0; + u8 flags0; + + min0 = vec_elt_at_index (tm46->mini_connections, imin0); + est0 = vec_elt_at_index (tm46->established_connections, iest0); + + if (min_match0 < 4) + { + ASSERT (min0->state != TCP_CONNECTION_STATE_unused); + ASSERT (min0->state != TCP_CONNECTION_STATE_established); + } + + seq_pair0 = is_min_match0 ? &min0->sequence_numbers : &est0->sequence_numbers; + + state0 = is_min_match0 ? min0->state : TCP_CONNECTION_STATE_unused; + state0 = is_est_match0 ? TCP_CONNECTION_STATE_established : state0; + + vnet_buffer (p0)->ip.tcp.established_connection_index = iest0; + vnet_buffer (p0)->ip.tcp.mini_connection_index = imin0; + vnet_buffer (p0)->ip.tcp.listener_index = li0 = tm->listener_index_by_dst_port[tcp0->ports.dst]; + + flags0 = tcp0->flags & (TCP_FLAG_SYN | TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_FIN); + + next0 = tm->disposition_by_state_and_flags[state0][flags0].next; + error0 = tm->disposition_by_state_and_flags[state0][flags0].error; + + next0 = li0 != 0 ? next0 : TCP_LOOKUP_NEXT_PUNT; + error0 = li0 != 0 ? error0 : TCP_ERROR_NO_LISTENER_FOR_PORT; + } + + p0->error = error_node->errors[error0]; + + if (PREDICT_FALSE (next0 != next)) + { + to_next -= 1; + n_left_to_next += 1; + + vlib_put_next_frame (vm, node, next, n_left_to_next); + + next = next0; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + if (node->flags & VLIB_NODE_FLAG_TRACE) + /* FIXME */ ; + + return frame->n_vectors; +} + +static uword +ip4_tcp_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_lookup (vm, node, frame, /* is_ip6 */ 0); } + +static uword +ip6_tcp_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_lookup (vm, node, frame, /* is_ip6 */ 1); } + +static void +ip46_size_hash_tables (ip46_tcp_main_t * m) +{ + m->mini_connection_hash_mask = pow2_mask (m->log2_n_mini_connection_hash_elts); + vec_validate_aligned (m->mini_connections, + m->mini_connection_hash_mask, + CLIB_CACHE_LINE_BYTES); + + m->established_connection_hash_mask = pow2_mask (m->log2_n_established_connection_hash_elts); + vec_validate_aligned (m->established_connections, + m->established_connection_hash_mask, + CLIB_CACHE_LINE_BYTES); +} + +static void +ip46_tcp_lookup_init (vlib_main_t * vm, tcp_main_t * tm, int is_ip6) +{ + ip46_tcp_main_t * m = is_ip6 ? &tm->ip6 : &tm->ip4; + + m->is_ip6 = is_ip6; + + m->log2_n_mini_connection_hash_elts = 8; + m->log2_n_established_connection_hash_elts = 8; + ip46_size_hash_tables (m); + + if (is_ip6) + { + vec_validate_aligned (tm->ip6_mini_connection_address_hash, + m->mini_connection_hash_mask / 4, + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (tm->ip6_established_connection_address_hash, + m->established_connection_hash_mask / 4, + CLIB_CACHE_LINE_BYTES); + } + else + { + vec_validate_aligned (tm->ip4_mini_connection_address_hash, + m->mini_connection_hash_mask / 4, + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (tm->ip4_established_connection_address_hash, + m->established_connection_hash_mask / 4, + CLIB_CACHE_LINE_BYTES); + } + tm->connection_hash_masks[is_ip6].as_u32[0] = m->mini_connection_hash_mask / 4; + tm->connection_hash_masks[is_ip6].as_u32[1] = m->established_connection_hash_mask / 4; +} + +static void +tcp_lookup_init (vlib_main_t * vm, tcp_main_t * tm) +{ + int is_ip6; + + /* Initialize hash seeds. */ + for (is_ip6 = 0; is_ip6 < 2; is_ip6++) + { + u32 * r = clib_random_buffer_get_data (&vm->random_buffer, 3 * 2 * sizeof (r[0])); + tm->connection_hash_seeds[is_ip6][0].as_u32[0] = r[0]; + tm->connection_hash_seeds[is_ip6][0].as_u32[1] = r[1]; + tm->connection_hash_seeds[is_ip6][1].as_u32[0] = r[2]; + tm->connection_hash_seeds[is_ip6][1].as_u32[1] = r[3]; + tm->connection_hash_seeds[is_ip6][2].as_u32[0] = r[4]; + tm->connection_hash_seeds[is_ip6][2].as_u32[1] = r[5]; + + ip46_tcp_lookup_init (vm, tm, is_ip6); + } + + { + tcp_listener_t * l; + + pool_get_aligned (tm->listener_pool, l, CLIB_CACHE_LINE_BYTES); + + /* Null listener must always have zero index. */ + ASSERT (l - tm->listener_pool == 0); + + memset (l, 0, sizeof (l[0])); + + /* No adjacencies are valid. */ + l->valid_local_adjacency_bitmap = 0; + + vec_validate_init_empty (tm->listener_index_by_dst_port, + (1 << 16) - 1, + l - tm->listener_pool); + } + + /* Initialize disposition table. */ + { + int i, j; + for (i = 0; i < ARRAY_LEN (tm->disposition_by_state_and_flags); i++) + for (j = 0; j < ARRAY_LEN (tm->disposition_by_state_and_flags[i]); j++) + { + tm->disposition_by_state_and_flags[i][j].next = TCP_LOOKUP_NEXT_DROP; + tm->disposition_by_state_and_flags[i][j].error = TCP_ERROR_LOOKUP_DROPS; + } + +#define _(t,f,n,e) \ +do { \ + tm->disposition_by_state_and_flags[TCP_CONNECTION_STATE_##t][f].next = (n); \ + tm->disposition_by_state_and_flags[TCP_CONNECTION_STATE_##t][f].error = (e); \ +} while (0) + + /* SYNs for new connections -> tcp-listen. */ + _ (unused, TCP_FLAG_SYN, + TCP_LOOKUP_NEXT_LISTEN_SYN, TCP_ERROR_NONE); + _ (listen_ack_wait, TCP_FLAG_ACK, + TCP_LOOKUP_NEXT_LISTEN_ACK, TCP_ERROR_NONE); + _ (established, TCP_FLAG_ACK, + TCP_LOOKUP_NEXT_ESTABLISHED, TCP_ERROR_NONE); + _ (established, TCP_FLAG_FIN | TCP_FLAG_ACK, + TCP_LOOKUP_NEXT_ESTABLISHED, TCP_ERROR_NONE); + +#undef _ + } + + /* IP4 packet templates. */ + { + ip4_tcp_syn_packet_t ip4_syn, ip4_syn_ack; + ip4_tcp_ack_packet_t ip4_ack, ip4_fin_ack, ip4_rst_ack; + ip6_tcp_syn_packet_t ip6_syn, ip6_syn_ack; + ip6_tcp_ack_packet_t ip6_ack, ip6_fin_ack, ip6_rst_ack; + + memset (&ip4_syn, 0, sizeof (ip4_syn)); + memset (&ip4_syn_ack, 0, sizeof (ip4_syn_ack)); + memset (&ip4_ack, 0, sizeof (ip4_ack)); + memset (&ip4_fin_ack, 0, sizeof (ip4_fin_ack)); + memset (&ip4_rst_ack, 0, sizeof (ip4_rst_ack)); + memset (&ip6_syn, 0, sizeof (ip6_syn)); + memset (&ip6_syn_ack, 0, sizeof (ip6_syn_ack)); + memset (&ip6_ack, 0, sizeof (ip6_ack)); + memset (&ip6_fin_ack, 0, sizeof (ip6_fin_ack)); + memset (&ip6_rst_ack, 0, sizeof (ip6_rst_ack)); + + ip4_tcp_packet_init (&ip4_syn.ip4, sizeof (ip4_syn)); + ip4_tcp_packet_init (&ip4_syn_ack.ip4, sizeof (ip4_syn_ack)); + ip4_tcp_packet_init (&ip4_ack.ip4, sizeof (ip4_ack)); + ip4_tcp_packet_init (&ip4_fin_ack.ip4, sizeof (ip4_fin_ack)); + ip4_tcp_packet_init (&ip4_rst_ack.ip4, sizeof (ip4_rst_ack)); + + ip6_tcp_packet_init (&ip6_syn.ip6, sizeof (ip6_syn)); + ip6_tcp_packet_init (&ip6_syn_ack.ip6, sizeof (ip6_syn_ack)); + ip6_tcp_packet_init (&ip6_ack.ip6, sizeof (ip6_ack)); + ip6_tcp_packet_init (&ip6_fin_ack.ip6, sizeof (ip6_fin_ack)); + ip6_tcp_packet_init (&ip6_rst_ack.ip6, sizeof (ip6_rst_ack)); + + /* TCP header. */ + { + u8 window_scale = 7; + tcp_syn_packet_t * s = &ip4_syn.tcp; + tcp_syn_packet_t * sa = &ip4_syn_ack.tcp; + tcp_ack_packet_t * a = &ip4_ack.tcp; + tcp_ack_packet_t * fa = &ip4_fin_ack.tcp; + tcp_ack_packet_t * ra = &ip4_rst_ack.tcp; + + s->header.tcp_header_u32s_and_reserved = (sizeof (s[0]) / sizeof (u32)) << 4; + a->header.tcp_header_u32s_and_reserved = (sizeof (a[0]) / sizeof (u32)) << 4; + + s->header.flags = TCP_FLAG_SYN; + a->header.flags = TCP_FLAG_ACK; + + s->header.window = clib_host_to_net_u16 (32 << (10 - window_scale)); + a->header.window = s->header.window; + + s->options.mss.type = TCP_OPTION_MSS; + s->options.mss.length = 4; + + s->options.window_scale.type = TCP_OPTION_WINDOW_SCALE; + s->options.window_scale.length = 3; + s->options.window_scale.value = window_scale; + + s->options.time_stamp.type = TCP_OPTION_TIME_STAMP; + s->options.time_stamp.length = 10; + + memset (&s->options.nops, TCP_OPTION_NOP, sizeof (s->options.nops)); + + /* SYN-ACK is same as SYN but with ACK flag set. */ + sa[0] = s[0]; + sa->header.flags |= TCP_FLAG_ACK; + + a->options.time_stamp.type = TCP_OPTION_TIME_STAMP; + a->options.time_stamp.length = 10; + memset (&a->options.nops, TCP_OPTION_NOP, sizeof (a->options.nops)); + + /* {FIN,RST}-ACK are same as ACK but with {FIN,RST} flag set. */ + fa[0] = a[0]; + fa->header.flags |= TCP_FLAG_FIN; + ra[0] = a[0]; + ra->header.flags |= TCP_FLAG_RST; + + /* IP6 TCP headers are identical. */ + ip6_syn.tcp = s[0]; + ip6_syn_ack.tcp = sa[0]; + ip6_ack.tcp = a[0]; + ip6_fin_ack.tcp = fa[0]; + ip6_rst_ack.tcp = ra[0]; + + /* TCP checksums. */ + { + ip_csum_t sum; + + sum = clib_host_to_net_u32 (sizeof (ip4_ack.tcp) + (ip4_ack.ip4.protocol << 16)); + sum = ip_incremental_checksum (sum, &ip4_ack.tcp, sizeof (ip4_ack.tcp)); + ip4_ack.tcp.header.checksum = ~ ip_csum_fold (sum); + + sum = clib_host_to_net_u32 (sizeof (ip4_fin_ack.tcp) + (ip4_fin_ack.ip4.protocol << 16)); + sum = ip_incremental_checksum (sum, &ip4_fin_ack.tcp, sizeof (ip4_fin_ack.tcp)); + ip4_fin_ack.tcp.header.checksum = ~ ip_csum_fold (sum); + + sum = clib_host_to_net_u32 (sizeof (ip4_rst_ack.tcp) + (ip4_rst_ack.ip4.protocol << 16)); + sum = ip_incremental_checksum (sum, &ip4_rst_ack.tcp, sizeof (ip4_rst_ack.tcp)); + ip4_rst_ack.tcp.header.checksum = ~ ip_csum_fold (sum); + + sum = clib_host_to_net_u32 (sizeof (ip4_syn.tcp) + (ip4_syn.ip4.protocol << 16)); + sum = ip_incremental_checksum (sum, &ip4_syn.tcp, sizeof (ip4_syn.tcp)); + ip4_syn.tcp.header.checksum = ~ ip_csum_fold (sum); + + sum = clib_host_to_net_u32 (sizeof (ip4_syn_ack.tcp) + (ip4_syn_ack.ip4.protocol << 16)); + sum = ip_incremental_checksum (sum, &ip4_syn_ack.tcp, sizeof (ip4_syn_ack.tcp)); + ip4_syn_ack.tcp.header.checksum = ~ ip_csum_fold (sum); + + sum = clib_host_to_net_u32 (sizeof (ip6_ack.tcp)) + ip6_ack.ip6.protocol; + sum = ip_incremental_checksum (sum, &ip6_ack.tcp, sizeof (ip6_ack.tcp)); + ip6_ack.tcp.header.checksum = ~ ip_csum_fold (sum); + + sum = clib_host_to_net_u32 (sizeof (ip6_fin_ack.tcp)) + ip6_fin_ack.ip6.protocol; + sum = ip_incremental_checksum (sum, &ip6_fin_ack.tcp, sizeof (ip6_fin_ack.tcp)); + ip6_fin_ack.tcp.header.checksum = ~ ip_csum_fold (sum); + + sum = clib_host_to_net_u32 (sizeof (ip6_rst_ack.tcp)) + ip6_rst_ack.ip6.protocol; + sum = ip_incremental_checksum (sum, &ip6_rst_ack.tcp, sizeof (ip6_rst_ack.tcp)); + ip6_rst_ack.tcp.header.checksum = ~ ip_csum_fold (sum); + + sum = clib_host_to_net_u32 (sizeof (ip6_syn.tcp)) + ip6_syn.ip6.protocol; + sum = ip_incremental_checksum (sum, &ip6_syn.tcp, sizeof (ip6_syn.tcp)); + ip6_syn.tcp.header.checksum = ~ ip_csum_fold (sum); + + sum = clib_host_to_net_u32 (sizeof (ip6_syn_ack.tcp)) + ip6_syn_ack.ip6.protocol; + sum = ip_incremental_checksum (sum, &ip6_syn_ack.tcp, sizeof (ip6_syn_ack.tcp)); + ip6_syn_ack.tcp.header.checksum = ~ ip_csum_fold (sum); + } + } + +#define _(t,x,n) \ +do { \ + vlib_packet_template_init \ + (vm, \ + &tm->ip4.packet_templates[t].vlib, \ + &x, sizeof (x), \ + /* alloc chunk size */ VLIB_FRAME_SIZE, \ + (n)); \ + tm->ip4.packet_templates[t].tcp_checksum_net_byte_order \ + = x.tcp.header.checksum; \ + tm->ip4.packet_templates[t].ip4_checksum_net_byte_order \ + = x.ip4.checksum; \ +} while (0) + + _ (TCP_PACKET_TEMPLATE_SYN, ip4_syn, "ip4 tcp syn"); + _ (TCP_PACKET_TEMPLATE_SYN_ACK, ip4_syn_ack, "ip4 tcp syn-ack"); + _ (TCP_PACKET_TEMPLATE_ACK, ip4_ack, "ip4 tcp ack"); + _ (TCP_PACKET_TEMPLATE_FIN_ACK, ip4_fin_ack, "ip4 tcp fin-ack"); + _ (TCP_PACKET_TEMPLATE_RST_ACK, ip4_rst_ack, "ip4 tcp rst-ack"); + +#undef _ + +#define _(t,x,n) \ +do { \ + vlib_packet_template_init \ + (vm, \ + &tm->ip6.packet_templates[t].vlib, \ + &x, sizeof (x), \ + /* alloc chunk size */ VLIB_FRAME_SIZE, \ + (n)); \ + tm->ip6.packet_templates[t].tcp_checksum_net_byte_order \ + = x.tcp.header.checksum; \ + tm->ip6.packet_templates[t].ip4_checksum_net_byte_order \ + = 0xdead; \ +} while (0) + + _ (TCP_PACKET_TEMPLATE_SYN, ip6_syn, "ip6 tcp syn"); + _ (TCP_PACKET_TEMPLATE_SYN_ACK, ip6_syn_ack, "ip6 tcp syn-ack"); + _ (TCP_PACKET_TEMPLATE_ACK, ip6_ack, "ip6 tcp ack"); + _ (TCP_PACKET_TEMPLATE_FIN_ACK, ip6_fin_ack, "ip6 tcp fin-ack"); + _ (TCP_PACKET_TEMPLATE_RST_ACK, ip6_rst_ack, "ip6 tcp rst-ack"); + +#undef _ + } +} + +static char * tcp_error_strings[] = { +#define _(sym,string) string, + foreach_tcp_error +#undef _ +}; + +VLIB_REGISTER_NODE (ip4_tcp_lookup_node,static) = { + .function = ip4_tcp_lookup, + .name = "ip4-tcp-lookup", + + .vector_size = sizeof (u32), + + .n_next_nodes = TCP_LOOKUP_N_NEXT, + .next_nodes = { + [TCP_LOOKUP_NEXT_DROP] = "error-drop", + [TCP_LOOKUP_NEXT_PUNT] = "error-punt", + [TCP_LOOKUP_NEXT_LISTEN_SYN] = "ip4-tcp-listen", + [TCP_LOOKUP_NEXT_LISTEN_ACK] = "ip4-tcp-establish", + [TCP_LOOKUP_NEXT_CONNECT_SYN_ACK] = "ip4-tcp-connect", + [TCP_LOOKUP_NEXT_ESTABLISHED] = "ip4-tcp-established", + }, + + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, +}; + +VLIB_REGISTER_NODE (ip6_tcp_lookup_node,static) = { + .function = ip6_tcp_lookup, + .name = "ip6-tcp-lookup", + + .vector_size = sizeof (u32), + + .n_next_nodes = TCP_LOOKUP_N_NEXT, + .next_nodes = { + [TCP_LOOKUP_NEXT_DROP] = "error-drop", + [TCP_LOOKUP_NEXT_PUNT] = "error-punt", + [TCP_LOOKUP_NEXT_LISTEN_SYN] = "ip6-tcp-listen", + [TCP_LOOKUP_NEXT_LISTEN_ACK] = "ip4-tcp-establish", + [TCP_LOOKUP_NEXT_CONNECT_SYN_ACK] = "ip6-tcp-connect", + [TCP_LOOKUP_NEXT_ESTABLISHED] = "ip6-tcp-established", + }, + + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, +}; + +static_always_inline void +tcp_options_decode_for_syn (tcp_main_t * tm, tcp_mini_connection_t * m, tcp_header_t * tcp) +{ + u8 * o = (void *) (tcp + 1); + u32 n_bytes = (tcp->tcp_header_u32s_and_reserved >> 4) * sizeof (u32); + u8 * e = o + n_bytes; + tcp_mini_connection_t * tmpl = &tm->option_decode_mini_connection_template; + tcp_option_type_t t; + u8 i, l, * p; + u8 * option_decode[16]; + + /* Initialize defaults. */ + option_decode[TCP_OPTION_MSS] = (u8 *) &tmpl->max_segment_size; + option_decode[TCP_OPTION_WINDOW_SCALE] = (u8 *) &tmpl->window_scale; + option_decode[TCP_OPTION_TIME_STAMP] = (u8 *) &tmpl->time_stamps.his_net_byte_order; + + if (n_bytes > 0) + { +#define _ \ +do { \ + t = o[0]; \ + i = t >= ARRAY_LEN (option_decode) ? TCP_OPTION_END : t; \ + option_decode[i] = o + 2; \ + /* Skip nop; don't skip end; else length from packet. */ \ + l = t < 2 ? t : o[1]; \ + p = o + l; \ + o = p < e ? p : o; \ +} while (0) + + _; _; _; + /* Fast path: NOP NOP TIMESTAMP. */ + if (o >= e) goto done; + _; _; + if (o >= e) goto done; + _; _; _; + +#undef _ + + done:; + } + + m->max_segment_size = + clib_net_to_host_u16 (*(u16 *) option_decode[TCP_OPTION_MSS]); + m->window_scale = *option_decode[TCP_OPTION_WINDOW_SCALE]; + m->time_stamps.his_net_byte_order = ((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[0]; +} + +static_always_inline u32 +tcp_options_decode_for_ack (tcp_main_t * tm, tcp_header_t * tcp, + u32 * his_time_stamp) +{ + u8 * o = (void *) (tcp + 1); + u32 n_bytes = (tcp->tcp_header_u32s_and_reserved >> 4) * sizeof (u32); + u8 * e = o + n_bytes; + tcp_option_type_t t; + u8 i, l, * p; + u8 * option_decode[16]; + u32 default_time_stamps[2]; + + /* Initialize defaults. */ + default_time_stamps[0] = default_time_stamps[1] = 0; + option_decode[TCP_OPTION_TIME_STAMP] = (u8 *) &default_time_stamps; + + if (n_bytes > 0) + { +#define _ \ +do { \ + t = o[0]; \ + i = t >= ARRAY_LEN (option_decode) ? TCP_OPTION_END : t; \ + option_decode[i] = o + 2; \ + /* Skip nop; don't skip end; else length from packet. */ \ + l = t < 2 ? t : o[1]; \ + p = o + l; \ + o = p < e ? p : o; \ +} while (0) + + _; _; _; + /* Fast path: NOP NOP TIMESTAMP. */ + if (o >= e) goto done; + _; _; + if (o >= e) goto done; + _; _; _; +#undef _ + + done:; + } + + if (his_time_stamp) + his_time_stamp[0] = ((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[0]; + + return clib_net_to_host_u32 (((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[1]); +} + +static void +tcp_options_decode_init (tcp_main_t * tm) +{ + tcp_mini_connection_t * m = &tm->option_decode_mini_connection_template; + + memset (m, 0, sizeof (m[0])); + m->max_segment_size = clib_host_to_net_u16 (576 - 40); + m->window_scale = 0; + m->time_stamps.his_net_byte_order = 0; +} + +/* Initialize target buffer as "related" to given buffer. */ +always_inline void +vlib_buffer_copy_shared_fields (vlib_main_t * vm, vlib_buffer_t * b, u32 bi_target) +{ + vlib_buffer_t * b_target = vlib_get_buffer (vm, bi_target); + vnet_buffer (b_target)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_RX]; + b_target->trace_index = b->trace_index; + b_target->flags |= b->flags & VLIB_BUFFER_IS_TRACED; +} + +typedef enum { + TCP_LISTEN_NEXT_DROP, + TCP_LISTEN_NEXT_REPLY, + TCP_LISTEN_N_NEXT, +} tcp_listen_next_t; + +static_always_inline uword +ip46_tcp_listen (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + uword is_ip6) +{ + tcp_main_t * tm = &tcp_main; + ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4; + uword n_packets = frame->n_vectors; + u32 * from, * to_reply, * to_drop, * random_ack_numbers; + u32 n_left_from, n_left_to_reply, n_left_to_drop, mini_now, timestamp_now; + u16 * fid, * fragment_ids; + vlib_node_runtime_t * error_node; + + error_node = vlib_node_get_runtime + (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + mini_now = tcp_time_now (tm, TCP_TIMER_mini_connection); + timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp); + + random_ack_numbers = clib_random_buffer_get_data (&vm->random_buffer, + n_packets * sizeof (random_ack_numbers[0])); + /* Get random fragment IDs for replies. */ + fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer, + n_packets * sizeof (fragment_ids[0])); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, TCP_LISTEN_NEXT_REPLY, + to_reply, n_left_to_reply); + vlib_get_next_frame (vm, node, TCP_LISTEN_NEXT_DROP, + to_drop, n_left_to_drop); + + while (n_left_from > 0 && n_left_to_reply > 0 && n_left_to_drop > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip60; + ip4_header_t * ip40; + tcp_header_t * tcp0; + tcp_mini_connection_t * min0; + tcp_syn_packet_t * tcp_reply0; + ip_csum_t tcp_sum0; + u32 bi0, bi_reply0, imin0, my_seq_net0, his_seq_host0, his_seq_net0; + u8 i0; + + bi0 = to_drop[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_drop += 1; + n_left_to_drop -= 1; + + p0 = vlib_get_buffer (vm, bi0); + + p0->error = error_node->errors[TCP_ERROR_LISTEN_RESPONSES]; + + imin0 = vnet_buffer (p0)->ip.tcp.mini_connection_index; + i0 = imin0 % 4; + + if (is_ip6) + { + ip6_tcp_udp_address_x4_and_timestamps_t * mina0; + + ip60 = vlib_buffer_get_current (p0); + tcp0 = ip6_next_header (ip60); + + mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0 / 4); + + ip6_tcp_udp_address_x4_set_from_headers (&mina0->address_x4, + ip60, tcp0, i0); + mina0->time_stamps[i0] = mini_now; + } + else + { + ip4_tcp_udp_address_x4_and_timestamps_t * mina0; + + ip40 = vlib_buffer_get_current (p0); + tcp0 = ip4_next_header (ip40); + + mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0 / 4); + + ip4_tcp_udp_address_x4_set_from_headers (&mina0->address_x4, + ip40, tcp0, i0); + mina0->time_stamps[i0] = mini_now; + } + + min0 = vec_elt_at_index (tm46->mini_connections, imin0); + + min0->state = TCP_CONNECTION_STATE_listen_ack_wait; + min0->time_stamps.ours_host_byte_order = timestamp_now; + tcp_options_decode_for_syn (tm, min0, tcp0); + + my_seq_net0 = *random_ack_numbers++; + his_seq_host0 = 1 + clib_net_to_host_u32 (tcp0->seq_number); + + min0->sequence_numbers.ours = 1 + clib_net_to_host_u32 (my_seq_net0); + min0->sequence_numbers.his = his_seq_host0; + + if (is_ip6) + { + ip6_tcp_syn_packet_t * r0; + uword tmp0, i; + + r0 = vlib_packet_template_get_packet + (vm, + &tm->ip6.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK].vlib, + &bi_reply0); + tcp_reply0 = &r0->tcp; + + tcp_sum0 = (tm->ip6.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK] + .tcp_checksum_net_byte_order); + + for (i = 0; i < ARRAY_LEN (ip60->dst_address.as_uword); i++) + { + tmp0 = r0->ip6.src_address.as_uword[i] = ip60->dst_address.as_uword[i]; + tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0); + + tmp0 = r0->ip6.dst_address.as_uword[i] = ip60->src_address.as_uword[i]; + tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0); + } + } + else + { + ip4_tcp_syn_packet_t * r0; + ip_csum_t ip_sum0; + u32 src0, dst0; + + r0 = vlib_packet_template_get_packet + (vm, + &tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK].vlib, + &bi_reply0); + tcp_reply0 = &r0->tcp; + + tcp_sum0 = (tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK] + .tcp_checksum_net_byte_order); + ip_sum0 = (tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK] + .ip4_checksum_net_byte_order); + + src0 = r0->ip4.src_address.as_u32 = ip40->dst_address.as_u32; + dst0 = r0->ip4.dst_address.as_u32 = ip40->src_address.as_u32; + + ip_sum0 = ip_csum_add_even (ip_sum0, src0); + tcp_sum0 = ip_csum_add_even (tcp_sum0, src0); + + ip_sum0 = ip_csum_add_even (ip_sum0, dst0); + tcp_sum0 = ip_csum_add_even (tcp_sum0, dst0); + + r0->ip4.checksum = ip_csum_fold (ip_sum0); + + ASSERT (r0->ip4.checksum == ip4_header_checksum (&r0->ip4)); + } + + tcp_reply0->header.ports.src = tcp0->ports.dst; + tcp_reply0->header.ports.dst = tcp0->ports.src; + tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->header.ports.src_and_dst); + + tcp_reply0->header.seq_number = my_seq_net0; + tcp_sum0 = ip_csum_add_even (tcp_sum0, my_seq_net0); + + his_seq_net0 = clib_host_to_net_u32 (his_seq_host0); + tcp_reply0->header.ack_number = his_seq_net0; + tcp_sum0 = ip_csum_add_even (tcp_sum0, his_seq_net0); + + { + ip_adjacency_t * adj0 = ip_get_adjacency (&ip4_main.lookup_main, vnet_buffer (p0)->ip.adj_index[VLIB_RX]); + u16 my_mss = + (adj0->rewrite_header.max_l3_packet_bytes + - (is_ip6 ? sizeof (ip60[0]) : sizeof (ip40[0])) + - sizeof (tcp0[0])); + + my_mss = clib_min (my_mss, min0->max_segment_size); + min0->max_segment_size = my_mss; + + tcp_reply0->options.mss.value = clib_host_to_net_u16 (my_mss); + tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.mss.value); + } + + tcp_reply0->options.time_stamp.my_time_stamp = clib_host_to_net_u32 (timestamp_now); + tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.time_stamp.my_time_stamp); + + tcp_reply0->options.time_stamp.his_time_stamp = min0->time_stamps.his_net_byte_order; + tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.time_stamp.his_time_stamp); + + tcp_reply0->header.checksum = ip_csum_fold (tcp_sum0); + + vlib_buffer_copy_shared_fields (vm, p0, bi_reply0); + + to_reply[0] = bi_reply0; + n_left_to_reply -= 1; + to_reply += 1; + } + + vlib_put_next_frame (vm, node, TCP_LISTEN_NEXT_REPLY, n_left_to_reply); + vlib_put_next_frame (vm, node, TCP_LISTEN_NEXT_DROP, n_left_to_drop); + } + + if (node->flags & VLIB_NODE_FLAG_TRACE) + /* FIXME */ ; + + return frame->n_vectors; +} + +static uword +ip4_tcp_listen (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_listen (vm, node, frame, /* is_ip6 */ 0); } + +static uword +ip6_tcp_listen (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_listen (vm, node, frame, /* is_ip6 */ 1); } + +VLIB_REGISTER_NODE (ip4_tcp_listen_node,static) = { + .function = ip4_tcp_listen, + .name = "ip4-tcp-listen", + + .vector_size = sizeof (u32), + + .n_next_nodes = TCP_LISTEN_N_NEXT, + .next_nodes = { + [TCP_LISTEN_NEXT_DROP] = "error-drop", + [TCP_LISTEN_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup", + }, +}; + +VLIB_REGISTER_NODE (ip6_tcp_listen_node,static) = { + .function = ip6_tcp_listen, + .name = "ip6-tcp-listen", + + .vector_size = sizeof (u32), + + .n_next_nodes = TCP_LISTEN_N_NEXT, + .next_nodes = { + [TCP_LISTEN_NEXT_DROP] = "error-drop", + [TCP_LISTEN_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup", + }, +}; + +typedef enum { + TCP_CONNECT_NEXT_DROP, + TCP_CONNECT_NEXT_REPLY, + TCP_CONNECT_N_NEXT, +} tcp_connect_next_t; + +static_always_inline uword +ip46_tcp_connect (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + uword is_ip6) +{ + tcp_main_t * tm = &tcp_main; + ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4; + uword n_packets = frame->n_vectors; + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next; + vlib_node_runtime_t * error_node; + + /* FIXME */ + clib_warning ("%p", tm46); + + error_node = vlib_node_get_runtime + (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip60; + ip4_header_t * ip40; + tcp_header_t * tcp0; + u32 bi0; + u8 error0, next0; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + + if (is_ip6) + { + ip60 = vlib_buffer_get_current (p0); + tcp0 = ip6_next_header (ip60); + } + else + { + ip40 = vlib_buffer_get_current (p0); + tcp0 = ip4_next_header (ip40); + } + + ASSERT (0); + + error0 = next0 = 0; + p0->error = error_node->errors[error0]; + + if (PREDICT_FALSE (next0 != next)) + { + to_next -= 1; + n_left_to_next += 1; + + vlib_put_next_frame (vm, node, next, n_left_to_next); + + next = next0; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + if (node->flags & VLIB_NODE_FLAG_TRACE) + /* FIXME */ ; + + return frame->n_vectors; +} + +static uword +ip4_tcp_connect (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_connect (vm, node, frame, /* is_ip6 */ 0); } + +static uword +ip6_tcp_connect (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_connect (vm, node, frame, /* is_ip6 */ 1); } + +VLIB_REGISTER_NODE (ip4_tcp_connect_node,static) = { + .function = ip4_tcp_connect, + .name = "ip4-tcp-connect", + + .vector_size = sizeof (u32), + + .n_next_nodes = TCP_CONNECT_N_NEXT, + .next_nodes = { + [TCP_CONNECT_NEXT_DROP] = "error-drop", + [TCP_CONNECT_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup", + }, +}; + +VLIB_REGISTER_NODE (ip6_tcp_connect_node,static) = { + .function = ip6_tcp_connect, + .name = "ip6-tcp-connect", + + .vector_size = sizeof (u32), + + .n_next_nodes = TCP_CONNECT_N_NEXT, + .next_nodes = { + [TCP_CONNECT_NEXT_DROP] = "error-drop", + [TCP_CONNECT_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup", + }, +}; + +typedef enum { + TCP_ESTABLISH_NEXT_DROP, + TCP_ESTABLISH_NEXT_ESTABLISHED, + TCP_ESTABLISH_N_NEXT, +} tcp_establish_next_t; + +static_always_inline uword +ip46_tcp_establish (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + uword is_ip6) +{ + tcp_main_t * tm = &tcp_main; + ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4; + uword n_packets = frame->n_vectors; + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next, mini_long_long_ago, timestamp_now; + vlib_node_runtime_t * error_node; + + error_node = vlib_node_get_runtime + (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next = node->cached_next_index; + mini_long_long_ago = + (tcp_time_now (tm, TCP_TIMER_mini_connection) + + (1 << (BITS (mini_long_long_ago) - 1))); + timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip60; + ip4_header_t * ip40; + tcp_header_t * tcp0; + tcp_mini_connection_t * min0; + tcp_connection_t * est0; + tcp_listener_t * l0; + u32 bi0, imin0, iest0; + u8 error0, next0, i0, e0; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + + imin0 = vnet_buffer (p0)->ip.tcp.mini_connection_index; + iest0 = vnet_buffer (p0)->ip.tcp.established_connection_index; + + i0 = imin0 % 4; + e0 = iest0 % 4; + + min0 = vec_elt_at_index (tm46->mini_connections, imin0); + if (PREDICT_FALSE (min0->state == TCP_CONNECTION_STATE_unused)) + goto already_established0; + min0->state = TCP_CONNECTION_STATE_unused; + + if (is_ip6) + { + ip60 = vlib_buffer_get_current (p0); + tcp0 = ip6_next_header (ip60); + } + else + { + ip40 = vlib_buffer_get_current (p0); + tcp0 = ip4_next_header (ip40); + } + + if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->seq_number) + != min0->sequence_numbers.his)) + goto unexpected_seq_number0; + if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->ack_number) + != min0->sequence_numbers.ours)) + goto unexpected_ack_number0; + + if (is_ip6) + { + ip6_tcp_udp_address_x4_and_timestamps_t * mina0; + ip6_tcp_udp_address_x4_t * esta0; + + mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0 / 4); + esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest0 / 4); + + ip6_tcp_udp_address_x4_copy_and_invalidate (esta0, &mina0->address_x4, e0, i0); + + mina0->time_stamps[i0] = mini_long_long_ago; + } + else + { + ip4_tcp_udp_address_x4_and_timestamps_t * mina0; + ip4_tcp_udp_address_x4_t * esta0; + + mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0 / 4); + esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest0 / 4); + + ip4_tcp_udp_address_x4_copy_and_invalidate (esta0, &mina0->address_x4, e0, i0); + + mina0->time_stamps[i0] = mini_long_long_ago; + } + + est0 = vec_elt_at_index (tm46->established_connections, iest0); + + est0->sequence_numbers = min0->sequence_numbers; + est0->max_segment_size = (min0->max_segment_size + - STRUCT_SIZE_OF (tcp_ack_packet_t, options)); + est0->his_window_scale = min0->window_scale; + est0->his_window = clib_net_to_host_u16 (tcp0->window); + est0->time_stamps.ours_host_byte_order = min0->time_stamps.ours_host_byte_order; + + /* Compute first measurement of round trip time. */ + { + u32 t = tcp_options_decode_for_ack (tm, tcp0, &est0->time_stamps.his_net_byte_order); + f64 dt = (timestamp_now - t) * tm->secs_per_tick[TCP_TIMER_timestamp]; + est0->round_trip_time_stats.sum = dt; + est0->round_trip_time_stats.sum2 = dt*dt; + est0->round_trip_time_stats.count = 1; + + { + ELOG_TYPE_DECLARE (e) = { + .format = "establish ack rtt: %.4e", + .format_args = "f8", + }; + struct { f64 dt; } * ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->dt = dt; + } + } + + est0->my_window_scale = 7; + est0->my_window = 256; + + l0 = pool_elt_at_index (tm->listener_pool, vnet_buffer (p0)->ip.tcp.listener_index); + vec_add1 (l0->event_connections[is_ip6], tcp_connection_handle_set (iest0, is_ip6)); + + next0 = TCP_ESTABLISH_NEXT_DROP; + error0 = TCP_ERROR_LISTENS_ESTABLISHED; + + enqueue0: + p0->error = error_node->errors[error0]; + if (PREDICT_FALSE (next0 != next)) + { + to_next -= 1; + n_left_to_next += 1; + + vlib_put_next_frame (vm, node, next, n_left_to_next); + + next = next0; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + continue; + + already_established0: + next0 = TCP_ESTABLISH_NEXT_ESTABLISHED; + error0 = TCP_ERROR_NONE; + goto enqueue0; + + unexpected_seq_number0: + next0 = TCP_ESTABLISH_NEXT_DROP; + error0 = TCP_ERROR_UNEXPECTED_SEQ_NUMBER; + goto enqueue0; + + unexpected_ack_number0: + next0 = TCP_ESTABLISH_NEXT_DROP; + error0 = TCP_ERROR_UNEXPECTED_ACK_NUMBER; + goto enqueue0; + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + if (node->flags & VLIB_NODE_FLAG_TRACE) + /* FIXME */ ; + + /* Inform listeners of new connections. */ + { + tcp_listener_t * l; + uword n; + pool_foreach (l, tm->listener_pool, ({ + if ((n = vec_len (l->event_connections[is_ip6])) > 0) + { + if (l->event_function) + l->event_function (l->event_connections[is_ip6], + TCP_EVENT_connection_established); + if (tm->n_established_connections[is_ip6] == 0) + vlib_node_set_state (vm, tm46->output_node_index, VLIB_NODE_STATE_POLLING); + tm->n_established_connections[is_ip6] += n; + _vec_len (l->event_connections[is_ip6]) = 0; + } + })); + } + + return frame->n_vectors; +} + +static uword +ip4_tcp_establish (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_establish (vm, node, frame, /* is_ip6 */ 0); } + +static uword +ip6_tcp_establish (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_establish (vm, node, frame, /* is_ip6 */ 1); } + +VLIB_REGISTER_NODE (ip4_tcp_establish_node,static) = { + .function = ip4_tcp_establish, + .name = "ip4-tcp-establish", + + .vector_size = sizeof (u32), + + .n_next_nodes = TCP_ESTABLISH_N_NEXT, + .next_nodes = { + [TCP_ESTABLISH_NEXT_DROP] = "error-drop", + [TCP_ESTABLISH_NEXT_ESTABLISHED] = "ip4-tcp-established", + }, +}; + +VLIB_REGISTER_NODE (ip6_tcp_establish_node,static) = { + .function = ip6_tcp_establish, + .name = "ip6-tcp-establish", + + .vector_size = sizeof (u32), + + .n_next_nodes = TCP_ESTABLISH_N_NEXT, + .next_nodes = { + [TCP_ESTABLISH_NEXT_DROP] = "error-drop", + [TCP_ESTABLISH_NEXT_ESTABLISHED] = "ip6-tcp-established", + }, +}; + +static_always_inline void +tcp_free_connection_x1 (vlib_main_t * vm, tcp_main_t * tm, + tcp_ip_4_or_6_t is_ip6, + u32 iest0) +{ + ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4; + tcp_connection_t * est0; + u32 iest_div0, iest_mod0; + + iest_div0 = iest0 / 4; + iest_mod0 = iest0 % 4; + + if (is_ip6) + { + ip6_tcp_udp_address_x4_t * esta0; + esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div0); + ip6_tcp_udp_address_x4_invalidate (esta0, iest_mod0); + } + else + { + ip4_tcp_udp_address_x4_t * esta0; + esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div0); + ip4_tcp_udp_address_x4_invalidate (esta0, iest_mod0); + } + + est0 = vec_elt_at_index (tm46->established_connections, iest0); +} + +static_always_inline void +tcp_free_connection_x2 (vlib_main_t * vm, tcp_main_t * tm, + tcp_ip_4_or_6_t is_ip6, + u32 iest0, u32 iest1) +{ + tcp_free_connection_x1 (vm, tm, is_ip6, iest0); + tcp_free_connection_x1 (vm, tm, is_ip6, iest1); +} + +static_always_inline uword +ip46_tcp_output (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + tcp_ip_4_or_6_t is_ip6) +{ + tcp_main_t * tm = &tcp_main; + ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4; + u32 * cis, * to_next, n_left_to_next, n_connections_left; + u32 timestamp_now_host_byte_order, timestamp_now_net_byte_order; + vlib_node_runtime_t * error_node; + const u32 next = 0; + uword n_acks; + + /* Inform listeners of new connections. */ + { + tcp_listener_t * l; + pool_foreach (l, tm->listener_pool, ({ + if (vec_len (l->eof_connections) > 0) + { + if (l->event_function) + l->event_function (l->eof_connections[is_ip6], TCP_EVENT_fin_received); + else + { + uword i; + for (i = 0; i < vec_len (l->eof_connections[is_ip6]); i++) + { + tcp_connection_t * c = tcp_get_connection (l->eof_connections[is_ip6][i]); + c->flags |= TCP_CONNECTION_FLAG_application_requested_close; + } + } + _vec_len (l->eof_connections[is_ip6]) = 0; + } + + if (vec_len (l->close_connections[is_ip6]) > 0) + { + uword n_left; + u32 * cis; + + if (l->event_function) + l->event_function (l->close_connections[is_ip6], TCP_EVENT_connection_closed); + + cis = l->close_connections[is_ip6]; + n_left = vec_len (cis); + ASSERT (tm->n_established_connections[is_ip6] >= n_left); + tm->n_established_connections[is_ip6] -= n_left; + if (tm->n_established_connections[is_ip6] == 0) + vlib_node_set_state (vm, tm46->output_node_index, VLIB_NODE_STATE_DISABLED); + while (n_left >= 2) + { + tcp_free_connection_x2 (vm, tm, is_ip6, cis[0], cis[1]); + n_left -= 2; + cis += 2; + } + + while (n_left > 0) + { + tcp_free_connection_x1 (vm, tm, is_ip6, cis[0]); + n_left -= 1; + cis += 1; + } + + _vec_len (l->close_connections[is_ip6]) = 0; + } + })); + } + + n_acks = 0; + cis = tm46->connections_pending_acks; + n_connections_left = vec_len (cis); + if (n_connections_left == 0) + return n_acks; + _vec_len (tm46->connections_pending_acks) = 0; + error_node = vlib_node_get_runtime + (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index); + + timestamp_now_host_byte_order = tcp_time_now (tm, TCP_TIMER_timestamp); + timestamp_now_net_byte_order = clib_host_to_net_u32 (timestamp_now_host_byte_order); + + while (n_connections_left > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_connections_left > 0 && n_left_to_next > 0) + { + tcp_connection_t * est0; + tcp_ack_packet_t * tcp0; + tcp_udp_ports_t * ports0; + ip_csum_t tcp_sum0; + tcp_packet_template_type_t template_type0; + u32 bi0, iest0, iest_div0, iest_mod0, my_seq_net0, his_seq_net0; + u8 is_fin0; + + iest0 = cis[0]; + cis += 1; + iest_div0 = iest0 / 4; + iest_mod0 = iest0 % 4; + est0 = vec_elt_at_index (tm46->established_connections, iest0); + + /* Send a FIN along with our ACK if application closed connection. */ + { + u8 is_closed0, fin_sent0; + + is_closed0 = (est0->flags & TCP_CONNECTION_FLAG_application_requested_close) != 0; + fin_sent0 = (est0->flags & TCP_CONNECTION_FLAG_fin_sent) != 0; + + is_fin0 = is_closed0 && ! fin_sent0; + template_type0 = + (is_fin0 + ? TCP_PACKET_TEMPLATE_FIN_ACK + : TCP_PACKET_TEMPLATE_ACK); + est0->flags |= is_closed0 << LOG2_TCP_CONNECTION_FLAG_fin_sent; + } + + if (is_ip6) + { + ip6_tcp_ack_packet_t * r0; + ip6_tcp_udp_address_x4_t * esta0; + uword tmp0, i; + + esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div0); + r0 = vlib_packet_template_get_packet + (vm, &tm->ip6.packet_templates[template_type0].vlib, &bi0); + tcp0 = &r0->tcp; + + tcp_sum0 = (tm->ip6.packet_templates[template_type0] + .tcp_checksum_net_byte_order); + + for (i = 0; i < ARRAY_LEN (r0->ip6.src_address.as_u32); i++) + { + tmp0 = r0->ip6.src_address.as_u32[i] = esta0->dst.as_u32[i][iest_mod0]; + tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0); + + tmp0 = r0->ip6.dst_address.as_u32[i] = esta0->src.as_u32[i][iest_mod0]; + tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0); + } + + ports0 = &esta0->ports.as_ports[iest_mod0]; + } + else + { + ip4_tcp_ack_packet_t * r0; + ip4_tcp_udp_address_x4_t * esta0; + ip_csum_t ip_sum0; + u32 src0, dst0; + + esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div0); + r0 = vlib_packet_template_get_packet + (vm, &tm->ip4.packet_templates[template_type0].vlib, &bi0); + tcp0 = &r0->tcp; + + ip_sum0 = (tm->ip4.packet_templates[template_type0] + .ip4_checksum_net_byte_order); + tcp_sum0 = (tm->ip4.packet_templates[template_type0] + .tcp_checksum_net_byte_order); + + src0 = r0->ip4.src_address.as_u32 = esta0->dst.as_ip4_address[iest_mod0].as_u32; + dst0 = r0->ip4.dst_address.as_u32 = esta0->src.as_ip4_address[iest_mod0].as_u32; + + ip_sum0 = ip_csum_add_even (ip_sum0, src0); + tcp_sum0 = ip_csum_add_even (tcp_sum0, src0); + + ip_sum0 = ip_csum_add_even (ip_sum0, dst0); + tcp_sum0 = ip_csum_add_even (tcp_sum0, dst0); + + r0->ip4.checksum = ip_csum_fold (ip_sum0); + + ASSERT (r0->ip4.checksum == ip4_header_checksum (&r0->ip4)); + ports0 = &esta0->ports.as_ports[iest_mod0]; + } + + tcp_sum0 = ip_csum_add_even (tcp_sum0, ports0->as_u32); + tcp0->header.ports.src = ports0->dst; + tcp0->header.ports.dst = ports0->src; + + my_seq_net0 = clib_host_to_net_u32 (est0->sequence_numbers.ours); + his_seq_net0 = clib_host_to_net_u32 (est0->sequence_numbers.his); + + /* FIN accounts for 1 sequence number. */ + est0->sequence_numbers.ours += is_fin0; + + tcp0->header.seq_number = my_seq_net0; + tcp_sum0 = ip_csum_add_even (tcp_sum0, my_seq_net0); + + tcp0->header.ack_number = his_seq_net0; + tcp_sum0 = ip_csum_add_even (tcp_sum0, his_seq_net0); + + est0->time_stamps.ours_host_byte_order = timestamp_now_host_byte_order; + tcp0->options.time_stamp.my_time_stamp = timestamp_now_net_byte_order; + tcp_sum0 = ip_csum_add_even (tcp_sum0, timestamp_now_net_byte_order); + + tcp0->options.time_stamp.his_time_stamp = est0->time_stamps.his_net_byte_order; + tcp_sum0 = ip_csum_add_even (tcp_sum0, est0->time_stamps.his_net_byte_order); + + tcp0->header.checksum = ip_csum_fold (tcp_sum0); + + est0->flags &= ~TCP_CONNECTION_FLAG_ack_pending; + + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + n_connections_left -= 1; + n_acks += 1; + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + vlib_error_count (vm, error_node->node_index, TCP_ERROR_ACKS_SENT, n_acks); + + return n_acks; +} + +static uword +ip4_tcp_output (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_output (vm, node, frame, /* is_ip6 */ 0); } + +static uword +ip6_tcp_output (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_output (vm, node, frame, /* is_ip6 */ 1); } + +VLIB_REGISTER_NODE (ip4_tcp_output_node,static) = { + .function = ip4_tcp_output, + .name = "ip4-tcp-output", + .state = VLIB_NODE_STATE_DISABLED, + .type = VLIB_NODE_TYPE_INPUT, + + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup", + }, +}; + +VLIB_REGISTER_NODE (ip6_tcp_output_node,static) = { + .function = ip6_tcp_output, + .name = "ip6-tcp-output", + .state = VLIB_NODE_STATE_DISABLED, + .type = VLIB_NODE_TYPE_INPUT, + + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup", + }, +}; + +static_always_inline void +tcp_ack (tcp_main_t * tm, tcp_connection_t * c, u32 n_bytes) +{ + ASSERT (n_bytes == 0); +} + +typedef enum { + TCP_ESTABLISHED_NEXT_DROP, + TCP_ESTABLISHED_N_NEXT, +} tcp_established_next_t; + +static_always_inline uword +ip46_tcp_established (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + tcp_ip_4_or_6_t is_ip6) +{ + tcp_main_t * tm = &tcp_main; + ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4; + uword n_packets = frame->n_vectors; + u32 * from, * to_next; + u32 n_left_from, n_left_to_next, next, timestamp_now; + vlib_node_runtime_t * error_node; + + error_node = vlib_node_get_runtime + (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next = node->cached_next_index; + timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + ip6_header_t * ip60; + ip4_header_t * ip40; + tcp_header_t * tcp0; + tcp_connection_t * est0; + tcp_listener_t * l0; + u32 bi0, iest0, n_data_bytes0, his_ack_host0, n_ack0; + u8 error0, next0, n_advance_bytes0, is_fin0, send_ack0; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + + if (is_ip6) + { + ip60 = vlib_buffer_get_current (p0); + tcp0 = ip6_next_header (ip60); + ASSERT (ip60->protocol == IP_PROTOCOL_TCP); + n_advance_bytes0 = tcp_header_bytes (tcp0); + n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) - n_advance_bytes0; + n_advance_bytes0 += sizeof (ip60[0]); + } + else + { + ip40 = vlib_buffer_get_current (p0); + tcp0 = ip4_next_header (ip40); + n_advance_bytes0 = (ip4_header_bytes (ip40) + + tcp_header_bytes (tcp0)); + n_data_bytes0 = clib_net_to_host_u16 (ip40->length) - n_advance_bytes0; + } + + iest0 = vnet_buffer (p0)->ip.tcp.established_connection_index; + est0 = vec_elt_at_index (tm46->established_connections, iest0); + + error0 = TCP_ERROR_NO_DATA; + next0 = TCP_ESTABLISHED_NEXT_DROP; + + if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->seq_number) + != est0->sequence_numbers.his)) + goto unexpected_seq_number0; + if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->ack_number) - est0->sequence_numbers.ours + > est0->n_tx_unacked_bytes)) + goto unexpected_ack_number0; + + is_fin0 = (tcp0->flags & TCP_FLAG_FIN) != 0; + + if (PREDICT_FALSE ((est0->flags & TCP_CONNECTION_FLAG_fin_received) + && (is_fin0 || n_data_bytes0 > 0))) + goto already_received_fin0; + + /* Update window. */ + est0->his_window = clib_net_to_host_u16 (tcp0->window); + + /* Update his sequence number to account for data he's just sent. */ + est0->sequence_numbers.his += n_data_bytes0 + is_fin0; + + his_ack_host0 = clib_net_to_host_u32 (tcp0->ack_number); + n_ack0 = his_ack_host0 - est0->sequence_numbers.ours; + tcp_ack (tm, est0, n_ack0); + est0->sequence_numbers.ours = his_ack_host0; + + { + u32 t = tcp_options_decode_for_ack (tm, tcp0, &est0->time_stamps.his_net_byte_order); + if (t != est0->time_stamps.ours_host_byte_order) + { + f64 dt = (timestamp_now - t) * tm->secs_per_tick[TCP_TIMER_timestamp]; + est0->round_trip_time_stats.sum += dt; + est0->round_trip_time_stats.sum2 += dt*dt; + est0->round_trip_time_stats.count += 1; + est0->time_stamps.ours_host_byte_order = t; + + { + ELOG_TYPE_DECLARE (e) = { + .format = "ack rtt: %.4e", + .format_args = "f8", + }; + struct { f64 dt; } * ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->dt = dt; + } + } + } + + send_ack0 = ((est0->flags & TCP_CONNECTION_FLAG_ack_pending) == 0 + && (n_data_bytes0 > 0 || is_fin0)); + vec_add1 (tm46->connections_pending_acks, vnet_buffer (p0)->ip.tcp.established_connection_index); + _vec_len (tm46->connections_pending_acks) -= ! send_ack0; + est0->flags |= send_ack0 << LOG2_TCP_CONNECTION_FLAG_ack_pending; + + est0->flags |= is_fin0 << LOG2_TCP_CONNECTION_FLAG_fin_received; + + l0 = pool_elt_at_index (tm->listener_pool, vnet_buffer (p0)->ip.tcp.listener_index); + + { + u32 ch0 = tcp_connection_handle_set (iest0, is_ip6); + + vec_add1 (l0->eof_connections[is_ip6], ch0); + _vec_len (l0->eof_connections[is_ip6]) -= ! is_fin0; + + vec_add1 (l0->close_connections[is_ip6], ch0); + _vec_len (l0->close_connections[is_ip6]) -= !(est0->flags & TCP_CONNECTION_FLAG_fin_sent); + } + + next0 = n_data_bytes0 > 0 ? l0->next_index : next0; + + vlib_buffer_advance (p0, n_advance_bytes0); + + enqueue0: + p0->error = error_node->errors[error0]; + if (PREDICT_FALSE (next0 != next)) + { + to_next -= 1; + n_left_to_next += 1; + + vlib_put_next_frame (vm, node, next, n_left_to_next); + + next = next0; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + continue; + + unexpected_seq_number0: + next0 = TCP_ESTABLISHED_NEXT_DROP; + error0 = TCP_ERROR_UNEXPECTED_SEQ_NUMBER; + goto enqueue0; + + unexpected_ack_number0: + next0 = TCP_ESTABLISHED_NEXT_DROP; + error0 = TCP_ERROR_UNEXPECTED_ACK_NUMBER; + goto enqueue0; + + already_received_fin0: + next0 = TCP_ESTABLISHED_NEXT_DROP; + error0 = TCP_ERROR_SEGMENT_AFTER_FIN; + goto enqueue0; + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + if (node->flags & VLIB_NODE_FLAG_TRACE) + /* FIXME */ ; + + return frame->n_vectors; +} + +static uword +ip4_tcp_established (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_established (vm, node, frame, /* is_ip6 */ 0); } + +static uword +ip6_tcp_established (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ return ip46_tcp_established (vm, node, frame, /* is_ip6 */ 1); } + +VLIB_REGISTER_NODE (ip4_tcp_established_node,static) = { + .function = ip4_tcp_established, + .name = "ip4-tcp-established", + + .vector_size = sizeof (u32), + + .n_next_nodes = TCP_ESTABLISHED_N_NEXT, + .next_nodes = { + [TCP_ESTABLISHED_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE (ip6_tcp_established_node,static) = { + .function = ip6_tcp_established, + .name = "ip6-tcp-established", + + .vector_size = sizeof (u32), + + .n_next_nodes = TCP_ESTABLISHED_N_NEXT, + .next_nodes = { + [TCP_ESTABLISHED_NEXT_DROP] = "error-drop", + }, +}; + +uword +tcp_register_listener (vlib_main_t * vm, + tcp_listener_registration_t * r) +{ + tcp_main_t * tm = &tcp_main; + tcp_listener_t * l; + + { + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, tcp_udp_lookup_init))) + clib_error_report (error); + } + + pool_get_aligned (tm->listener_pool, l, CLIB_CACHE_LINE_BYTES); + + memset (l, 0, sizeof (l[0])); + + l->dst_port = r->port; + l->next_index = vlib_node_add_next (vm, ip4_tcp_established_node.index, r->data_node_index); + l->valid_local_adjacency_bitmap = 0; + l->flags = r->flags & (TCP_LISTENER_IP4 | TCP_LISTENER_IP6); + + tm->listener_index_by_dst_port[clib_host_to_net_u16 (l->dst_port)] = l - tm->listener_pool; + + return l - tm->listener_pool; +} + +static void +tcp_udp_lookup_ip4_add_del_interface_address (ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, + u32 is_delete) +{ + tcp_main_t * tm = &tcp_main; + + tm->ip4.default_valid_local_adjacency_bitmap + = clib_bitmap_set (tm->ip4.default_valid_local_adjacency_bitmap, + if_address_index, + is_delete ? 0 : 1); +} + +static void +tcp_udp_lookup_ip6_add_del_interface_address (ip6_main_t * im, + uword opaque, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, + u32 if_address_index, + u32 is_delete) +{ + tcp_main_t * tm = &tcp_main; + + tm->ip6.default_valid_local_adjacency_bitmap + = clib_bitmap_set (tm->ip6.default_valid_local_adjacency_bitmap, + if_address_index, + is_delete ? 0 : 1); +} + +static clib_error_t * +tcp_udp_lookup_init (vlib_main_t * vm) +{ + tcp_main_t * tm = &tcp_main; + ip4_main_t * im4 = &ip4_main; + ip6_main_t * im6 = &ip6_main; + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, ip4_lookup_init))) + return error; + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + tcp_time_init (vm, tm); + + { + ip4_add_del_interface_address_callback_t cb; + + cb.function = tcp_udp_lookup_ip4_add_del_interface_address; + cb.function_opaque = 0; + vec_add1 (im4->add_del_interface_address_callbacks, cb); + } + + { + ip6_add_del_interface_address_callback_t cb; + + cb.function = tcp_udp_lookup_ip6_add_del_interface_address; + cb.function_opaque = 0; + vec_add1 (im6->add_del_interface_address_callbacks, cb); + } + + tm->ip4.output_node_index = ip4_tcp_output_node.index; + tm->ip6.output_node_index = ip6_tcp_output_node.index; + + tcp_lookup_init (vm, tm); + tcp_options_decode_init (tm); + + tm->tx_buffer_free_list = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX; + tm->tx_buffer_free_list_n_buffer_bytes = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + + return 0; +} + +VLIB_INIT_FUNCTION (tcp_udp_lookup_init); + +static u8 * format_tcp_time_stamp (u8 * s, va_list * va) +{ + tcp_timer_type_t type = va_arg (*va, tcp_timer_type_t); + u32 value = va_arg (*va, u32); + vlib_main_t * vm = vlib_get_main(); + tcp_main_t * tm = &tcp_main; + u64 now; + f64 dt; + + now = clib_cpu_time_now (); + dt = vm->clib_time.seconds_per_clock * (now - (value << tm->log2_clocks_per_tick[type])); + return format (s, "%.4e sec", dt); +} + +static u8 * format_tcp_connection_state (u8 * s, va_list * va) +{ + tcp_connection_state_t st = va_arg (*va, tcp_connection_state_t); + char * t = 0; + switch (st) + { +#define _(f) case TCP_CONNECTION_STATE_##f: t = #f; break; + foreach_tcp_connection_state +#undef _ + default: break; + } + if (t) + s = format (s, "%s", t); + else + s = format (s, "unknown 0x%x", st); + + return s; +} + +static u8 * format_tcp_ip_4_or_6 (u8 * s, va_list * va) +{ + tcp_ip_4_or_6_t is_ip6 = va_arg (*va, tcp_ip_4_or_6_t); + return format (s, "%s", is_ip6 ? "ip6" : "ip4"); +} + +static u8 * format_tcp_mini_connection (u8 * s, va_list * va) +{ + tcp_mini_connection_t * c = va_arg (*va, tcp_mini_connection_t *); + + s = format (s, "state %U, window scale %d, mss %d", + format_tcp_connection_state, c->state, + c->window_scale, c->max_segment_size); + + return s; +} + +static u8 * format_ip4_tcp_mini_connection (u8 * s, va_list * va) +{ + u32 imin = va_arg (*va, u32); + u32 imin_div, imin_mod; + tcp_main_t * tm = &tcp_main; + tcp_mini_connection_t * min; + ip4_tcp_udp_address_x4_and_timestamps_t * mina; + + imin_div = imin / 4; + imin_mod = imin % 4; + + mina = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin_div); + + s = format (s, "%U, age %U", + format_ip4_tcp_udp_address_x4, &mina->address_x4, imin_div, + format_tcp_time_stamp, TCP_TIMER_mini_connection, mina->time_stamps[imin_div]); + + min = vec_elt_at_index (tm->ip4.mini_connections, imin); + + s = format (s, "%U", format_tcp_mini_connection, min); + + return s; +} + +static u8 * format_ip6_tcp_mini_connection (u8 * s, va_list * va) +{ + u32 imin = va_arg (*va, u32); + u32 imin_div, imin_mod; + tcp_main_t * tm = &tcp_main; + tcp_mini_connection_t * min; + ip6_tcp_udp_address_x4_and_timestamps_t * mina; + + imin_div = imin / 4; + imin_mod = imin % 4; + + mina = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin_div); + + s = format (s, "%U, age %U", + format_ip6_tcp_udp_address_x4, &mina->address_x4, imin_div, + format_tcp_time_stamp, TCP_TIMER_mini_connection, mina->time_stamps[imin_div]); + + min = vec_elt_at_index (tm->ip6.mini_connections, imin); + + s = format (s, "%U", format_tcp_mini_connection, min); + + return s; +} + +static u8 * format_tcp_established_connection (u8 * s, va_list * va) +{ + tcp_connection_t * c = va_arg (*va, tcp_connection_t *); + + if (c->flags != 0) + { + s = format (s, ", flags: "); +#define _(f) if (c->flags & TCP_CONNECTION_FLAG_##f) s = format (s, "%s, ", #f); + foreach_tcp_connection_flag; +#undef _ + } + + if (tcp_round_trip_time_stats_is_valid (&c->round_trip_time_stats)) + { + f64 r[2]; + tcp_round_trip_time_stats_compute (&c->round_trip_time_stats, r); + s = format (s, ", rtt %.4e +- %.4e", + r[0], r[1]); + } + + return s; +} + +static u8 * format_ip4_tcp_established_connection (u8 * s, va_list * va) +{ + u32 iest = va_arg (*va, u32); + u32 iest_div, iest_mod; + tcp_main_t * tm = &tcp_main; + tcp_connection_t * est; + ip4_tcp_udp_address_x4_t * esta; + + iest_div = iest / 4; + iest_mod = iest % 4; + + esta = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div); + est = vec_elt_at_index (tm->ip4.established_connections, iest); + + s = format (s, "%U%U", + format_ip4_tcp_udp_address_x4, esta, iest_mod, + format_tcp_established_connection, est); + + return s; +} + +static u8 * format_ip6_tcp_established_connection (u8 * s, va_list * va) +{ + u32 iest = va_arg (*va, u32); + u32 iest_div, iest_mod; + tcp_main_t * tm = &tcp_main; + tcp_connection_t * est; + ip6_tcp_udp_address_x4_t * esta; + + iest_div = iest / 4; + iest_mod = iest % 4; + + esta = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div); + est = vec_elt_at_index (tm->ip6.established_connections, iest); + + s = format (s, "%U%U", + format_ip6_tcp_udp_address_x4, esta, iest_mod, + format_tcp_established_connection, est); + + return s; +} + +VLIB_CLI_COMMAND (vlib_cli_show_tcp_command, static) = { + .path = "show tcp", + .short_help = "Transmission control protocol (TCP) show commands", +}; + +static clib_error_t * +show_mini_connections (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) +{ + tcp_main_t * tm = &tcp_main; + ip46_tcp_main_t * tm46; + tcp_ip_4_or_6_t is_ip6 = TCP_IP4; + tcp_mini_connection_t * min; + ip6_tcp_udp_address_x4_and_timestamps_t * mina6; + ip4_tcp_udp_address_x4_and_timestamps_t * mina4; + clib_error_t * error = 0; + uword i, i0, i1, n_valid; + + if (unformat (input, "4")) + is_ip6 = TCP_IP4; + if (unformat (input, "6")) + is_ip6 = TCP_IP6; + + n_valid = 0; + tm46 = is_ip6 ? &tm->ip6 : &tm->ip4; + for (i = 0; i <= tm46->mini_connection_hash_mask; i++) + { + i0 = i / 4; + i1 = i % 4; + + min = vec_elt_at_index (tm46->mini_connections, i); + if (is_ip6) + { + mina6 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, i0); + if (ip6_tcp_udp_address_x4_is_valid (&mina6->address_x4, i1)) + { + vlib_cli_output (vm, "%U", format_ip4_tcp_mini_connection, i); + n_valid += 1; + } + } + else + { + mina4 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, i0); + if (ip4_tcp_udp_address_x4_is_valid (&mina4->address_x4, i1)) + { + vlib_cli_output (vm, "%U", format_ip6_tcp_mini_connection, i); + n_valid += 1; + } + } + } + + if (n_valid == 0) + vlib_cli_output (vm, "no %U mini tcp connections", format_tcp_ip_4_or_6, is_ip6); + + return error; +} + +VLIB_CLI_COMMAND (vlib_cli_show_tcp_mini_connections_command) = { + .path = "show tcp mini-connections", + .short_help = "Show not-yet established TCP connections", + .function = show_mini_connections, +}; + +static clib_error_t * +show_established_connections (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) +{ + tcp_main_t * tm = &tcp_main; + ip46_tcp_main_t * tm46; + tcp_ip_4_or_6_t is_ip6 = TCP_IP4; + tcp_connection_t * est; + ip6_tcp_udp_address_x4_t * esta6; + ip4_tcp_udp_address_x4_t * esta4; + clib_error_t * error = 0; + uword i, i0, i1, n_valid; + + if (unformat (input, "4")) + is_ip6 = TCP_IP4; + if (unformat (input, "6")) + is_ip6 = TCP_IP6; + + n_valid = 0; + tm46 = is_ip6 ? &tm->ip6 : &tm->ip4; + for (i = 0; i < vec_len (tm46->established_connections); i++) + { + i0 = i / 4; + i1 = i % 4; + + est = vec_elt_at_index (tm46->established_connections, i); + if (is_ip6) + { + esta6 = vec_elt_at_index (tm->ip6_established_connection_address_hash, i0); + if (ip6_tcp_udp_address_x4_is_valid (esta6, i1)) + { + vlib_cli_output (vm, "%U", format_ip6_tcp_established_connection, i); + n_valid += 1; + } + } + else + { + esta4 = vec_elt_at_index (tm->ip4_established_connection_address_hash, i0); + if (ip4_tcp_udp_address_x4_is_valid (esta4, i1)) + { + vlib_cli_output (vm, "%U", format_ip4_tcp_established_connection, i); + n_valid += 1; + } + } + } + + if (n_valid == 0) + vlib_cli_output (vm, "no %U established tcp connections", format_tcp_ip_4_or_6, is_ip6); + + return error; +} + +VLIB_CLI_COMMAND (vlib_cli_show_tcp_established_connections_command, static) = { + .path = "show tcp connections", + .short_help = "Show established TCP connections", + .function = show_established_connections, +}; + +#if 0 +uword +tcp_write (vlib_main_t * vm, u32 connection_handle, void * data, uword n_data_bytes) +{ + tcp_main_t * tm = &tcp_main; + tcp_ip_4_or_6_t is_ip6 = tcp_connection_is_ip6 (connection_handle); + ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4; + tcp_connection_t * c = vec_elt_at_index (tm46->established_connections, connection_handle / 2); + vlib_buffer_t * b; + u32 bi, bi_next, bi_start_of_packet; + ip_csum_t sum; + + b = 0; + bi = c->write_tail_buffer_index; + n_bytes_left_tail = 0; + if (bi != 0) + { + b = vlib_get_buffer (vm, bi); + n_bytes_left_tail = tm->tx_buffer_free_list_n_buffer_bytes - b->current_length; + } + + n_bytes_this_packet = c->write_tail_packet.n_data_bytes; + n_bytes_left_packet = c->max_segment_size - n_bytes_this_packet; + + n_data_left = n_data_bytes; + sum = c->write_tail_packet.data_ip_checksum; + + while (n_data_left > 0) + { + u32 n_copy; + + if (n_bytes_left_tail == 0) + { + if (! vlib_buffer_alloc_from_free_list (vm, &bi_next, 1, + tm->tx_buffer_free_list)) + return n_data_bytes - n_data_left; + + bi_start_of_packet = bi_next; + if (b) + { + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + b->next_buffer = bi_next; + bi_start_of_packet = b->opaque[0]; + } + bi = bi_next; + b = vlib_get_buffer (vm, bi); + + /* Save away start of packet buffer in opaque. */ + b->opaque[0] = bi_start_of_packet; + + c->tail_buffer.buffer_index = bi; + n_bytes_left_tail = tm->tx_buffer_free_list_n_buffer_bytes; + } + + n_copy = n_data_left; + n_copy = clib_min (n_copy, n_bytes_left_tail); + n_copy = clib_min (n_copy, n_bytes_left_packet); + + sum = ip_csum_and_memcpy (sum, b->data + b->current_length, + data, n_copy); + + b->current_length += n_copy; + n_bytes_left_tail -= n_copy; + n_bytes_left_packet -= n_copy; + n_data_left -=- n_copy; + n_bytes_this_packet += n_copy; + + if (n_bytes_left_packet == 0) + { + bi_start_of_packet = b->opaque[0]; + + if (c->tail_packet.buffer_index != 0) + { + vlib_buffer_t * p = vlib_get_buffer (vm, c->tail_packet.buffer_index); + tcp_buffer_t * next = vlib_get_buffer_opaque (p); + next[0] = c->; + } + c->tail_packet.buffer_index = bi_start_of_packet; + } + } + + c->tail_buffer.buffer_index = bi; + c->tail_buffer.n_data_bytes = n_bytes_this_packet; + c->tail_buffer.data_ip_checksum = ip_csum_fold (sum); + + return 0; +} +#endif diff --git a/vnet/vnet/ip/tcp.h b/vnet/vnet/ip/tcp.h new file mode 100644 index 00000000000..98d8e34f0d5 --- /dev/null +++ b/vnet/vnet/ip/tcp.h @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/tcp.h: tcp protocol + * + * Copyright (c) 2011 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_tcp_protocol_h +#define included_tcp_protocol_h + +#include <vppinfra/vector.h> + +/* No support for e.g. Altivec. */ +#if defined (__SSE2__) +#define TCP_HAVE_VEC128 +#endif + +typedef union { + struct { + u16 src, dst; + }; + u32 as_u32; +} tcp_udp_ports_t; + +typedef union { +#ifdef TCP_HAVE_VEC128 + u32x4 as_u32x4; +#endif + tcp_udp_ports_t as_ports[4]; +} tcp_udp_ports_x4_t; + +typedef struct { + union { +#ifdef TCP_HAVE_VEC128 + u32x4 as_u32x4; +#endif + ip4_address_t as_ip4_address[4]; + } src, dst; + tcp_udp_ports_x4_t ports; +} ip4_tcp_udp_address_x4_t; + +typedef struct { + union { +#ifdef TCP_HAVE_VEC128 + u32x4 as_u32x4[4]; +#endif + u32 as_u32[4][4]; + } src, dst; + tcp_udp_ports_x4_t ports; +} ip6_tcp_udp_address_x4_t; + +typedef struct { + u32 his, ours; +} tcp_sequence_pair_t; + +/* Time stamps saved from options. */ +typedef struct { + u32 ours_host_byte_order, his_net_byte_order; +} tcp_time_stamp_pair_t; + +typedef struct { + ip4_tcp_udp_address_x4_t address_x4; + u32 time_stamps[4]; +} ip4_tcp_udp_address_x4_and_timestamps_t; + +typedef struct { + ip6_tcp_udp_address_x4_t address_x4; + u32 time_stamps[4]; +} ip6_tcp_udp_address_x4_and_timestamps_t; + +#define foreach_tcp_connection_state \ + /* unused */ \ + _ (unused) \ + /* Sent SYN-ACK waiting for ACK if he ever feels like sending one. */ \ + _ (listen_ack_wait) \ + /* Sent SYN waiting for ACK or RST. */ \ + _ (connecting) \ + /* Pseudo-type for established connections. */ \ + _ (established) + +typedef enum { +#define _(f) TCP_CONNECTION_STATE_##f, + foreach_tcp_connection_state +#undef _ + TCP_N_CONNECTION_STATE, +} tcp_connection_state_t; + +/* Kept small to fight off syn flood attacks. */ +typedef struct { + tcp_sequence_pair_t sequence_numbers; + + tcp_time_stamp_pair_t time_stamps; + + /* segment size and window scale (saved from options + or set to defaults). */ + u16 max_segment_size; + + u8 window_scale; + + tcp_connection_state_t state : 8; +} tcp_mini_connection_t; + +typedef struct { + /* Sum and sum^2 of measurements. + Used to compute average and RMS. */ + f64 sum, sum2; + + /* Number of measurements. */ + f64 count; +} tcp_round_trip_time_stats_t; + +typedef struct { + u32 first_buffer_index_this_packet; + + u16 data_ip_checksum; + + u16 n_data_bytes; +} tcp_tx_packet_t; + +typedef struct { + tcp_sequence_pair_t sequence_numbers; + + tcp_time_stamp_pair_t time_stamps; + + tcp_tx_packet_t head_packet, tx_tail_packet, write_tail_packet; + + u32 write_tail_buffer_index; + + tcp_round_trip_time_stats_t round_trip_time_stats; + + /* Number of un-acknowledged bytes we've sent. */ + u32 n_tx_unacked_bytes; + + /* segment size and window scale (saved from options + or set to defaults). */ + u16 max_segment_size; + + /* Window from latest received packet. */ + u16 his_window; + + u16 my_window; + + u8 his_window_scale; + + u8 my_window_scale; + + /* ip4/ip6 tos/ttl to use for packets we send. */ + u8 tos, ttl; + + u16 flags; +#define foreach_tcp_connection_flag \ + _ (ack_pending) \ + _ (fin_received) \ + _ (fin_sent) \ + _ (application_requested_close) + + u8 listener_opaque[128 + - 1 * sizeof (tcp_sequence_pair_t) + - 1 * sizeof (tcp_time_stamp_pair_t) + - 3 * sizeof (tcp_tx_packet_t) + - 1 * sizeof (tcp_round_trip_time_stats_t) + - 2 * sizeof (u32) + - 4 * sizeof (u16) + - 4 * sizeof (u8)]; +} tcp_connection_t; + +typedef enum { + TCP_IP4, + TCP_IP6, + TCP_N_IP46, +} tcp_ip_4_or_6_t; + +typedef enum { +#define _(f) LOG2_TCP_CONNECTION_FLAG_##f, + foreach_tcp_connection_flag +#undef _ + N_TCP_CONNECTION_FLAG, +#define _(f) TCP_CONNECTION_FLAG_##f = 1 << LOG2_TCP_CONNECTION_FLAG_##f, + foreach_tcp_connection_flag +#undef _ +} tcp_connection_flag_t; + +typedef enum { + TCP_PACKET_TEMPLATE_SYN, + TCP_PACKET_TEMPLATE_SYN_ACK, + TCP_PACKET_TEMPLATE_ACK, + TCP_PACKET_TEMPLATE_FIN_ACK, + TCP_PACKET_TEMPLATE_RST_ACK, + TCP_N_PACKET_TEMPLATE, +} tcp_packet_template_type_t; + +typedef struct { + vlib_packet_template_t vlib; + + /* TCP checksum of template with zeros for all + variable fields. Network byte order. */ + u16 tcp_checksum_net_byte_order; + + /* IP4 checksum. */ + u16 ip4_checksum_net_byte_order; +} tcp_packet_template_t; + +typedef struct { + u8 log2_n_mini_connection_hash_elts; + u8 log2_n_established_connection_hash_elts; + u8 is_ip6; + + u32 mini_connection_hash_mask; + u32 established_connection_hash_mask; + + uword * established_connection_overflow_hash; + + tcp_mini_connection_t * mini_connections; + + tcp_connection_t * established_connections; + + /* Vector of established connection indices which need ACKs sent. */ + u32 * connections_pending_acks; + + /* Default valid_local_adjacency_bitmap for listeners who want to listen + for a given port in on all interfaces. */ + uword * default_valid_local_adjacency_bitmap; + + u32 output_node_index; + + tcp_packet_template_t packet_templates[TCP_N_PACKET_TEMPLATE]; +} ip46_tcp_main_t; + +#define foreach_tcp_event \ + /* Received a SYN-ACK after sending a SYN to connect. */ \ + _ (connection_established) \ + /* Received a reset (RST) after sending a SYN to connect. */ \ + _ (connect_failed) \ + /* Received a FIN from an established connection. */ \ + _ (fin_received) \ + _ (connection_closed) \ + /* Received a reset RST from an established connection. */ \ + _ (reset_received) + +typedef enum { +#define _(f) TCP_EVENT_##f, + foreach_tcp_event +#undef _ +} tcp_event_type_t; + +typedef void (tcp_event_function_t) + (u32 * connections, + tcp_event_type_t event_type); + +typedef struct { + /* Bitmap indicating which of local (interface) addresses + we should listen on for this destination port. */ + uword * valid_local_adjacency_bitmap; + + /* Destination tcp/udp port to listen for connections. */ + u16 dst_port; + + u16 next_index; + + u32 flags; + + /* Connection indices for which event in event_function applies to. */ + u32 * event_connections[TCP_N_IP46]; + u32 * eof_connections[TCP_N_IP46]; + u32 * close_connections[TCP_N_IP46]; + + tcp_event_function_t * event_function; +} tcp_listener_t; + +typedef struct { + u8 next, error; +} tcp_lookup_disposition_t; + +#define foreach_tcp_timer \ + /* Used to rank mini connections. */ \ + _ (mini_connection, 10e-3) \ + /* Used for timestamps. */ \ + _ (timestamp, 1e-6) + +typedef enum { +#define _(f,s) TCP_TIMER_##f, + foreach_tcp_timer +#undef _ + TCP_N_TIMER, +} tcp_timer_type_t; + +typedef struct { + ip46_tcp_main_t ip4, ip6; + + /* Array of non-established connections, but soon-to be established connections. */ + ip4_tcp_udp_address_x4_and_timestamps_t * ip4_mini_connection_address_hash; + ip6_tcp_udp_address_x4_and_timestamps_t * ip6_mini_connection_address_hash; + + /* Vector of size log2_n_established_connection_hash_elts plus overflow. */ + ip4_tcp_udp_address_x4_t * ip4_established_connection_address_hash; + ip6_tcp_udp_address_x4_t * ip6_established_connection_address_hash; + + /* Jenkins hash seeds for established and mini hash tables. */ + u32x4_union_t connection_hash_seeds[2][3]; + u32x4_union_t connection_hash_masks[2]; + + /* Pool of listeners. */ + tcp_listener_t * listener_pool; + + /* Table mapping destination port to listener index. */ + u16 * listener_index_by_dst_port; + + tcp_lookup_disposition_t disposition_by_state_and_flags[TCP_N_CONNECTION_STATE][64]; + + u8 log2_clocks_per_tick[TCP_N_TIMER]; + + f64 secs_per_tick[TCP_N_TIMER]; + + /* Holds pointers to default and per-packet TCP options while + parsing a TCP packet's options. */ + tcp_mini_connection_t option_decode_mini_connection_template; + + /* Count of currently established connections. */ + u32 n_established_connections[TCP_N_IP46]; + + u32 tx_buffer_free_list; + u32 tx_buffer_free_list_n_buffer_bytes; +} tcp_main_t; + +/* Global TCP main structure. */ +tcp_main_t tcp_main; + +typedef struct { + /* Listen on this port. */ + u16 port; + +#define TCP_LISTENER_IP4 (1 << 0) +#define TCP_LISTENER_IP6 (1 << 1) + u16 flags; + + /* Next node index for data packets. */ + u32 data_node_index; + + /* Event function: called on new connections, etc. */ + tcp_event_function_t * event_function; +} tcp_listener_registration_t; + +uword +tcp_register_listener (vlib_main_t * vm, tcp_listener_registration_t * r); + +always_inline tcp_ip_4_or_6_t +tcp_connection_is_ip6 (u32 h) +{ return h & 1; } + +always_inline tcp_ip_4_or_6_t +tcp_connection_handle_set (u32 iest, tcp_ip_4_or_6_t is_ip6) +{ return is_ip6 + 2*iest; } + +always_inline tcp_connection_t * +tcp_get_connection (u32 connection_handle) +{ + u32 iest = connection_handle / 2; + tcp_ip_4_or_6_t is_ip6 = tcp_connection_is_ip6 (connection_handle); + tcp_main_t * tm = &tcp_main; + ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4; + return vec_elt_at_index (tm46->established_connections, iest); +} + +#endif /* included_tcp_protocol_h */ diff --git a/vnet/vnet/ip/tcp_format.c b/vnet/vnet/ip/tcp_format.c new file mode 100644 index 00000000000..afc3dd20c49 --- /dev/null +++ b/vnet/vnet/ip/tcp_format.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/tcp_format.c: tcp formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +static u8 * format_tcp_flags (u8 * s, va_list * args) +{ + int flags = va_arg (*args, int); + +#define _(f) if (flags & TCP_FLAG_##f) s = format (s, "%s, ", #f); + foreach_tcp_flag +#undef _ + + return s; +} + +/* Format TCP header. */ +u8 * format_tcp_header (u8 * s, va_list * args) +{ + tcp_header_t * tcp = va_arg (*args, tcp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + uword indent; + + /* Nothing to do. */ + if (max_header_bytes < sizeof (tcp[0])) + return format (s, "TCP header truncated"); + + indent = format_get_indent (s); + indent += 2; + + s = format (s, "TCP: %d -> %d", + clib_net_to_host_u16 (tcp->ports.src), + clib_net_to_host_u16 (tcp->ports.dst)); + + s = format (s, "\n%Useq. tx 0x%08x rx 0x%08x", + format_white_space, indent, + clib_net_to_host_u32 (tcp->seq_number), + clib_net_to_host_u32 (tcp->ack_number)); + + s = format (s, "\n%Uflags %U, tcp header: %d bytes", + format_white_space, indent, + format_tcp_flags, tcp->flags, + (tcp->tcp_header_u32s_and_reserved >> 4) * sizeof (u32)); + + s = format (s, "\n%Uwindow %d, checksum 0x%04x", + format_white_space, indent, + clib_net_to_host_u16 (tcp->window), + clib_net_to_host_u16 (tcp->checksum)); + + header_bytes = tcp_header_bytes (tcp); + + /* Format TCP options. */ +#if 0 + { + u8 * o; + u8 * option_start = (void *) (tcp + 1); + u8 * option_end = (void *) tcp + header_bytes; + + for (o = option_start; o < option_end; ) + { + u32 length = o[1]; + switch (o[0]) + { + case TCP_OPTION_END: + length = 1; + o = option_end; + break; + + case TCP_OPTION_NOP: + length = 1; + break; + + } + } + } +#endif + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ip_main_t * im = &ip_main; + tcp_udp_port_info_t * pi; + + pi = ip_get_tcp_udp_port_info (im, tcp->ports.dst); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", + format_white_space, indent - 2, + pi->format_header, + /* next protocol header */ (void*) tcp + header_bytes, + max_header_bytes - header_bytes); + } + + return s; +} diff --git a/vnet/vnet/ip/tcp_init.c b/vnet/vnet/ip/tcp_init.c new file mode 100644 index 00000000000..3e88d87e11e --- /dev/null +++ b/vnet/vnet/ip/tcp_init.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/tcp_init.c: tcp initialization + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/ip/format.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/tcp_packet.h> + +static clib_error_t * +tcp_init (vlib_main_t * vm) +{ + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi; + clib_error_t * error; + + error = vlib_call_init_function (vm, ip_main_init); + + if (! error) + { + pi = ip_get_protocol_info (im, IP_PROTOCOL_TCP); + pi->format_header = format_tcp_header; + + pi->unformat_pg_edit = unformat_pg_tcp_header; + } + + return 0; +} + +VLIB_INIT_FUNCTION (tcp_init); diff --git a/vnet/vnet/ip/tcp_packet.h b/vnet/vnet/ip/tcp_packet.h new file mode 100644 index 00000000000..ebb111572a0 --- /dev/null +++ b/vnet/vnet/ip/tcp_packet.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip4/tcp_packet.h: TCP packet format (see RFC 793) + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_tcp_packet_h +#define included_tcp_packet_h + +/* TCP flags bit 0 first. */ +#define foreach_tcp_flag \ + _ (FIN) \ + _ (SYN) \ + _ (RST) \ + _ (PSH) \ + _ (ACK) \ + _ (URG) \ + _ (ECE) \ + _ (CWR) + +enum { +#define _(f) TCP_FLAG_BIT_##f, + foreach_tcp_flag +#undef _ + TCP_N_FLAG_BITS, + +#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f, + foreach_tcp_flag +#undef _ +}; + +typedef struct { + /* Source and destination port. */ + union { + struct { + u16 src, dst; + }; + u32 src_and_dst; + } ports; + + /* Sequence and acknowledgment number. */ + u32 seq_number, ack_number; + + /* Size of TCP header in 32-bit units plus 4 reserved bits. */ + u8 tcp_header_u32s_and_reserved; + + /* see foreach_tcp_flag for enumation of tcp flags. */ + u8 flags; + + /* Current window advertised by sender. + This is the number of bytes sender is willing to receive + right now. */ + u16 window; + + /* Checksum of TCP pseudo header and data. */ + u16 checksum; + + u16 urgent_pointer; +} tcp_header_t; + +always_inline int +tcp_header_bytes (tcp_header_t * t) +{ return (t->tcp_header_u32s_and_reserved >> 4) * sizeof (u32); } + +/* TCP options. */ +typedef enum tcp_option_type { + TCP_OPTION_END = 0, + TCP_OPTION_NOP = 1, + TCP_OPTION_MSS = 2, + TCP_OPTION_WINDOW_SCALE = 3, + TCP_OPTION_SACK_PERMITTED = 4, + TCP_OPTION_SACK_BLOCK = 5, + TCP_OPTION_TIME_STAMP = 8, +} tcp_option_type_t; + +/* All except NOP and END have 1 byte length field. */ +typedef struct { + tcp_option_type_t type : 8; + + /* Length of this option in bytes. */ + u8 length; +} tcp_option_with_length_t; + +#endif /* included_tcp_packet_h */ + diff --git a/vnet/vnet/ip/tcp_pg.c b/vnet/vnet/ip/tcp_pg.c new file mode 100644 index 00000000000..122592d1594 --- /dev/null +++ b/vnet/vnet/ip/tcp_pg.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/tcp_pg: TCP packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> + +static void +tcp_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, + u32 n_packets) +{ + vlib_main_t * vm = pg->vlib_main; + u32 ip_offset, tcp_offset; + + tcp_offset = g->start_byte_offset; + ip_offset = (g-1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + tcp_header_t * tcp0; + ip_csum_t sum0; + u32 tcp_len0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ASSERT (p0->current_data == 0); + ip0 = (void *) (p0->data + ip_offset); + tcp0 = (void *) (p0->data + tcp_offset); + tcp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]); + + /* Initialize checksum with header. */ + if (BITS (sum0) == 32) + { + sum0 = clib_mem_unaligned (&ip0->src_address, u32); + sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32)); + } + else + sum0 = clib_mem_unaligned (&ip0->src_address, u64); + + sum0 = ip_csum_with_carry + (sum0, clib_host_to_net_u32 (tcp_len0 + (ip0->protocol << 16))); + + /* Invalidate possibly old checksum. */ + tcp0->checksum = 0; + + sum0 = ip_incremental_checksum_buffer (vm, p0, tcp_offset, tcp_len0, sum0); + + tcp0->checksum = ~ ip_csum_fold (sum0); + } +} + +typedef struct { + struct { pg_edit_t src, dst; } ports; + pg_edit_t seq_number, ack_number; + pg_edit_t tcp_header_u32s; +#define _(f) pg_edit_t f##_flag; + foreach_tcp_flag +#undef _ + pg_edit_t window; + pg_edit_t checksum; + pg_edit_t urgent_pointer; +} pg_tcp_header_t; + +static inline void +pg_tcp_header_init (pg_tcp_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, tcp_header_t, f); + _ (ports.src); + _ (ports.dst); + _ (seq_number); + _ (ack_number); + _ (window); + _ (checksum); + _ (urgent_pointer); +#undef _ + + /* Initialize bit fields. */ +#define _(f) \ + pg_edit_init_bitfield (&p->f##_flag, tcp_header_t, \ + flags, \ + TCP_FLAG_BIT_##f, 1); + + foreach_tcp_flag +#undef _ + + pg_edit_init_bitfield (&p->tcp_header_u32s, tcp_header_t, + tcp_header_u32s_and_reserved, + 4, 4); +} + +uword +unformat_pg_tcp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_tcp_header_t * p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t), + &group_index); + pg_tcp_header_init (p); + + /* Defaults. */ + pg_edit_set_fixed (&p->seq_number, 0); + pg_edit_set_fixed (&p->ack_number, 0); + + pg_edit_set_fixed (&p->tcp_header_u32s, sizeof (tcp_header_t) / sizeof (u32)); + + pg_edit_set_fixed (&p->window, 4096); + pg_edit_set_fixed (&p->urgent_pointer, 0); + +#define _(f) pg_edit_set_fixed (&p->f##_flag, 0); + foreach_tcp_flag +#undef _ + + p->checksum.type = PG_EDIT_UNSPECIFIED; + + if (! unformat (input, "TCP: %U -> %U", + unformat_pg_edit, + unformat_tcp_udp_port, &p->ports.src, + unformat_pg_edit, + unformat_tcp_udp_port, &p->ports.dst)) + goto error; + + /* Parse options. */ + while (1) + { + if (unformat (input, "window %U", + unformat_pg_edit, + unformat_pg_number, &p->window)) + ; + + else if (unformat (input, "checksum %U", + unformat_pg_edit, + unformat_pg_number, &p->checksum)) + ; + + /* Flags. */ +#define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1); + foreach_tcp_flag +#undef _ + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t * im = &ip_main; + u16 dst_port; + tcp_udp_port_info_t * pi; + + pi = 0; + if (p->ports.dst.type == PG_EDIT_FIXED) + { + dst_port = pg_edit_get_value (&p->ports.dst, PG_EDIT_LO); + pi = ip_get_tcp_udp_port_info (im, dst_port); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + g->edit_function = tcp_pg_edit_function; + g->edit_function_opaque = 0; + } + + return 1; + } + + error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + diff --git a/vnet/vnet/ip/udp.h b/vnet/vnet/ip/udp.h new file mode 100644 index 00000000000..65eef29cb10 --- /dev/null +++ b/vnet/vnet/ip/udp.h @@ -0,0 +1,113 @@ +/* + * ip/udp.h: udp protocol + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_udp_h +#define included_udp_h + +#include <vnet/vnet.h> +#include <vnet/ip/udp_packet.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/ip4.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/pg/pg.h> +#include <vnet/ip/format.h> + +typedef enum { +#define udp_error(n,s) UDP_ERROR_##n, +#include <vnet/ip/udp_error.def> +#undef udp_error + UDP_N_ERROR, +} udp_error_t; + +#define foreach_udp4_dst_port \ +_ (67, dhcp_to_server) \ +_ (68, dhcp_to_client) \ +_ (500, ikev2) \ +_ (4341, lisp_gpe) \ +_ (4739, ipfix) \ +_ (4789, vxlan) \ +_ (4790, vxlan_gpe) \ +_ (6633, vpath_3) + + +#define foreach_udp6_dst_port \ +_ (547, dhcpv6_to_server) \ +_ (546, dhcpv6_to_client) \ +_ (6633, vpath6_3) + +typedef enum { +#define _(n,f) UDP_DST_PORT_##f = n, + foreach_udp4_dst_port + foreach_udp6_dst_port +#undef _ +} udp_dst_port_t; + +typedef enum { +#define _(n,f) UDP6_DST_PORT_##f = n, + foreach_udp6_dst_port +#undef _ +} udp6_dst_port_t; + +typedef struct { + /* Name (a c string). */ + char * name; + + /* GRE protocol type in host byte order. */ + udp_dst_port_t dst_port; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} udp_dst_port_info_t; + +typedef enum { + UDP_IP6 = 0, + UDP_IP4, /* the code is full of is_ip4... */ + N_UDP_AF, +} udp_af_t; + +typedef struct { + udp_dst_port_info_t * dst_port_infos [N_UDP_AF]; + + /* Hash tables mapping name/protocol to protocol info index. */ + uword * dst_port_info_by_name[N_UDP_AF]; + uword * dst_port_info_by_dst_port[N_UDP_AF]; + + /* convenience */ + vlib_main_t * vlib_main; +} udp_main_t; + +always_inline udp_dst_port_info_t * +udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4) +{ + uword * p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port); + return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0; +} + +format_function_t format_udp_header; +format_function_t format_udp_rx_trace; + +unformat_function_t unformat_udp_header; + +void udp_register_dst_port (vlib_main_t * vm, + udp_dst_port_t dst_port, + u32 node_index, u8 is_ip4); + +#endif /* included_udp_h */ + diff --git a/vnet/vnet/ip/udp_error.def b/vnet/vnet/ip/udp_error.def new file mode 100644 index 00000000000..46e3bd9ef47 --- /dev/null +++ b/vnet/vnet/ip/udp_error.def @@ -0,0 +1,20 @@ +/* + * udp_error.def: gre errors + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +udp_error (NONE, "no error") +udp_error (NO_LISTENER, "no listener for dst port") +udp_error (LENGTH_ERROR, "UDP packets with length errors") diff --git a/vnet/vnet/ip/udp_format.c b/vnet/vnet/ip/udp_format.c new file mode 100644 index 00000000000..dd54095908c --- /dev/null +++ b/vnet/vnet/ip/udp_format.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/udp_format.c: udp formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +/* Format UDP header. */ +u8 * format_udp_header (u8 * s, va_list * args) +{ + udp_header_t * udp = va_arg (*args, udp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + uword indent; + u32 header_bytes = sizeof (udp[0]); + + /* Nothing to do. */ + if (max_header_bytes < sizeof (udp[0])) + return format (s, "UDP header truncated"); + + indent = format_get_indent (s); + indent += 2; + + s = format (s, "UDP: %d -> %d", + clib_net_to_host_u16 (udp->src_port), + clib_net_to_host_u16 (udp->dst_port)); + + s = format (s, "\n%Ulength %d, checksum 0x%04x", + format_white_space, indent, + clib_net_to_host_u16 (udp->length), + clib_net_to_host_u16 (udp->checksum)); + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ip_main_t * im = &ip_main; + tcp_udp_port_info_t * pi; + + pi = ip_get_tcp_udp_port_info (im, udp->dst_port); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", + format_white_space, indent - 2, + pi->format_header, + /* next protocol header */ (udp + 1), + max_header_bytes - sizeof (udp[0])); + } + + return s; +} diff --git a/vnet/vnet/ip/udp_init.c b/vnet/vnet/ip/udp_init.c new file mode 100644 index 00000000000..40ca032923c --- /dev/null +++ b/vnet/vnet/ip/udp_init.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/udp_init.c: udp initialization + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +clib_error_t * +udp_init (vlib_main_t * vm) +{ + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi; + clib_error_t * error; + + error = vlib_call_init_function (vm, ip_main_init); + + if (! error) + { + pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP); + if (pi == 0) + return clib_error_return (0, "UDP protocol info AWOL"); + pi->format_header = format_udp_header; + pi->unformat_pg_edit = unformat_pg_udp_header; + } + + return 0; +} + +VLIB_INIT_FUNCTION (udp_init); diff --git a/vnet/vnet/ip/udp_local.c b/vnet/vnet/ip/udp_local.c new file mode 100644 index 00000000000..c9355d2a322 --- /dev/null +++ b/vnet/vnet/ip/udp_local.c @@ -0,0 +1,508 @@ +/* + * node.c: udp packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/ip/udp.h> +#include <vnet/ip/udp_packet.h> +#include <vppinfra/sparse_vec.h> + +udp_main_t udp_main; + +#define foreach_udp_input_next \ + _ (PUNT, "error-punt") \ + _ (DROP, "error-drop") + +typedef enum { +#define _(s,n) UDP_INPUT_NEXT_##s, + foreach_udp_input_next +#undef _ + UDP_INPUT_N_NEXT, +} udp_input_next_t; + +typedef struct { + u16 src_port; + u16 dst_port; +} udp_rx_trace_t; + +u8 * format_udp_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + udp_rx_trace_t * t = va_arg (*args, udp_rx_trace_t *); + + s = format (s, "UDP: src-port %d dst-port %d", + clib_net_to_host_u16(t->src_port), + clib_net_to_host_u16(t->dst_port)); + return s; +} + +typedef struct { + /* Sparse vector mapping udp dst_port in network byte order + to next index. */ + u16 * next_by_dst_port; + + u32 * sparse_index_by_next_index; +} udp_input_runtime_t; + +vlib_node_registration_t udp4_input_node; +vlib_node_registration_t udp6_input_node; + +always_inline uword +udp46_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int is_ip4) +{ + udp_input_runtime_t * rt = is_ip4 ? + (void *) vlib_node_get_runtime_data (vm, udp4_input_node.index) + : (void *) vlib_node_get_runtime_data (vm, udp6_input_node.index); + __attribute__((unused)) u32 n_left_from, next_index, i_next, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + i_next = vec_elt (rt->sparse_index_by_next_index, next_index); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + udp_header_t * h0 = 0, * h1 = 0; + u32 i0, i1, dst_port0, dst_port1; + u32 advance0, advance1; + u32 error0, next0, error1, next1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* ip4/6_local hands us the ip header, not the udp header */ + if (is_ip4) + { + advance0 = sizeof(ip4_header_t); + advance1 = sizeof(ip4_header_t); + } + else + { + advance0 = sizeof(ip6_header_t); + advance1 = sizeof(ip6_header_t); + } + + if (PREDICT_FALSE(b0->current_length < advance0 + sizeof (h0))) + { + error0 = UDP_ERROR_LENGTH_ERROR; + next0 = UDP_INPUT_NEXT_DROP; + } + else + { + vlib_buffer_advance (b0, advance0); + h0 = vlib_buffer_get_current (b0); + error0 = next0 = 0; + } + + if (PREDICT_FALSE(b1->current_length < advance1 + sizeof (h1))) + { + error1 = UDP_ERROR_LENGTH_ERROR; + next1 = UDP_INPUT_NEXT_DROP; + } + else + { + vlib_buffer_advance (b1, advance1); + h1 = vlib_buffer_get_current (b1); + error1 = next1 = 0; + } + + + /* Index sparse array with network byte order. */ + dst_port0 = (error0 == 0) ? h0->dst_port : 0; + dst_port1 = (error1 == 0) ? h1->dst_port : 0; + sparse_vec_index2 (rt->next_by_dst_port, dst_port0, dst_port1, + &i0, &i1); + next0 = (error0 == 0) ? vec_elt(rt->next_by_dst_port, i0) : next0; + next1 = (error1 == 0) ? vec_elt(rt->next_by_dst_port, i1) : next1; + + if (PREDICT_TRUE (error0 == 0)) + b0->error = node->errors[next0 == SPARSE_VEC_INVALID_INDEX ? UDP_ERROR_NO_LISTENER : UDP_ERROR_NONE]; + if (PREDICT_TRUE (error1 == 0)) + b1->error = node->errors[next1 == SPARSE_VEC_INVALID_INDEX ? UDP_ERROR_NO_LISTENER : UDP_ERROR_NONE]; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h0->src_port; + tr->dst_port = h0->dst_port; + } + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h1->src_port; + tr->dst_port = h1->dst_port; + } + } + + vlib_buffer_advance (b0, sizeof (*h0)); + vlib_buffer_advance (b1, sizeof (*h1)); + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * h0 = 0; + u32 i0, next0; + u32 advance0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* ip4/6_local hands us the ip header, not the udp header */ + if (is_ip4) + advance0 = sizeof(ip4_header_t); + else + advance0 = sizeof(ip6_header_t); + + if (PREDICT_FALSE(b0->current_length < advance0 + sizeof (h0))) + { + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; + next0 = UDP_INPUT_NEXT_DROP; + goto trace_x1; + } + + vlib_buffer_advance (b0, advance0); + + h0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE + (clib_net_to_host_u16(h0->length) <= b0->current_length)) + { + i0 = sparse_vec_index (rt->next_by_dst_port, h0->dst_port); + next0 = vec_elt(rt->next_by_dst_port, i0); + + b0->error = node->errors [next0 == SPARSE_VEC_INVALID_INDEX ? UDP_ERROR_NO_LISTENER : UDP_ERROR_NONE]; + } + else + { + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; + next0 = UDP_INPUT_NEXT_DROP; + } + + trace_x1: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h0->src_port; + tr->dst_port = h0->dst_port; + } + } + vlib_buffer_advance (b0, sizeof (*h0)); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static char * udp_error_strings[] = { +#define udp_error(n,s) s, +#include "udp_error.def" +#undef udp_error +}; + +static uword +udp4_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */); +} + +static uword +udp6_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */); +} + + +VLIB_REGISTER_NODE (udp4_input_node) = { + .function = udp4_input, + .name = "ip4-udp-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof (udp_input_runtime_t), + + .n_errors = UDP_N_ERROR, + .error_strings = udp_error_strings, + + .n_next_nodes = UDP_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [UDP_INPUT_NEXT_##s] = n, + foreach_udp_input_next +#undef _ + }, + + .format_buffer = format_udp_header, + .format_trace = format_udp_rx_trace, + .unformat_buffer = unformat_udp_header, +}; + +VLIB_REGISTER_NODE (udp6_input_node) = { + .function = udp6_input, + .name = "ip6-udp-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof (udp_input_runtime_t), + + .n_errors = UDP_N_ERROR, + .error_strings = udp_error_strings, + + .n_next_nodes = UDP_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [UDP_INPUT_NEXT_##s] = n, + foreach_udp_input_next +#undef _ + }, + + .format_buffer = format_udp_header, + .format_trace = format_udp_rx_trace, + .unformat_buffer = unformat_udp_header, +}; + +static void add_dst_port (udp_main_t * um, + udp_dst_port_t dst_port, + char * dst_port_name, u8 is_ip4) +{ + udp_dst_port_info_t * pi; + u32 i; + + vec_add2 (um->dst_port_infos[is_ip4], pi, 1); + i = pi - um->dst_port_infos[is_ip4]; + + pi->name = dst_port_name; + pi->dst_port = dst_port; + pi->next_index = pi->node_index = ~0; + + hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i); + + if (pi->name) + hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i); +} + +void +udp_register_dst_port (vlib_main_t * vm, + udp_dst_port_t dst_port, + u32 node_index, u8 is_ip4) +{ + udp_main_t * um = &udp_main; + udp_dst_port_info_t * pi; + udp_input_runtime_t * rt; + u16 * n; + u32 i; + + { + clib_error_t * error = vlib_call_init_function (vm, udp_local_init); + if (error) + clib_error_report (error); + } + + pi = udp_get_dst_port_info (um, dst_port, is_ip4); + if (! pi) + { + add_dst_port (um, dst_port, 0, is_ip4); + pi = udp_get_dst_port_info (um, dst_port, is_ip4); + ASSERT (pi); + } + + pi->node_index = node_index; + pi->next_index = vlib_node_add_next (vm, + is_ip4 ? udp4_input_node.index + : udp6_input_node.index, + node_index); + + /* Setup udp protocol -> next index sparse vector mapping. */ + rt = vlib_node_get_runtime_data + (vm, is_ip4 ? udp4_input_node.index: udp6_input_node.index); + n = sparse_vec_validate (rt->next_by_dst_port, + clib_host_to_net_u16 (dst_port)); + n[0] = pi->next_index; + + /* Rebuild next index -> sparse index inverse mapping when sparse vector + is updated. */ + vec_validate (rt->sparse_index_by_next_index, pi->next_index); + for (i = 1; i < vec_len (rt->next_by_dst_port); i++) + rt->sparse_index_by_next_index[rt->next_by_dst_port[i]] = i; +} + +/* Parse a UDP header. */ +uword unformat_udp_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + udp_header_t * udp; + __attribute__((unused)) int old_length; + u16 src_port, dst_port; + + /* Allocate space for IP header. */ + { + void * p; + + old_length = vec_len (*result); + vec_add2 (*result, p, sizeof (ip4_header_t)); + udp = p; + } + + memset (udp, 0, sizeof (udp[0])); + if (unformat (input, "src-port %d dst-port %d", + &src_port, &dst_port)) + { + udp->src_port = clib_host_to_net_u16 (src_port); + udp->dst_port = clib_host_to_net_u16 (dst_port); + return 1; + } + return 0; +} + +static void +udp_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + pg_node_t * pn = pg_get_node (node_index); + + n->format_buffer = format_udp_header; + n->unformat_buffer = unformat_udp_header; + pn->unformat_edit = unformat_pg_udp_header; +} + +clib_error_t * udp_local_init (vlib_main_t * vm) +{ + udp_input_runtime_t * rt; + udp_main_t * um = &udp_main; + int i; + + { + clib_error_t * error; + error = vlib_call_init_function (vm, udp_init); + if (error) + clib_error_report (error); + } + + + for (i = 0; i < 2; i++) + { + um->dst_port_info_by_name[i] = hash_create_string (0, sizeof(uword)); + um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof(uword)); + } + + udp_setup_node (vm, udp4_input_node.index); + udp_setup_node (vm, udp6_input_node.index); + + rt = vlib_node_get_runtime_data (vm, udp4_input_node.index); + + rt->next_by_dst_port = sparse_vec_new + (/* elt bytes */ sizeof (rt->next_by_dst_port[0]), + /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); + + vec_validate (rt->sparse_index_by_next_index, UDP_INPUT_NEXT_DROP); + vec_validate (rt->sparse_index_by_next_index, UDP_INPUT_NEXT_PUNT); + rt->sparse_index_by_next_index[UDP_INPUT_NEXT_DROP] + = SPARSE_VEC_INVALID_INDEX; + rt->sparse_index_by_next_index[UDP_INPUT_NEXT_PUNT] + = SPARSE_VEC_INVALID_INDEX; + +#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */); + foreach_udp4_dst_port +#undef _ + + rt = vlib_node_get_runtime_data (vm, udp6_input_node.index); + + rt->next_by_dst_port = sparse_vec_new + (/* elt bytes */ sizeof (rt->next_by_dst_port[0]), + /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); + + vec_validate (rt->sparse_index_by_next_index, UDP_INPUT_NEXT_DROP); + vec_validate (rt->sparse_index_by_next_index, UDP_INPUT_NEXT_PUNT); + rt->sparse_index_by_next_index[UDP_INPUT_NEXT_DROP] + = SPARSE_VEC_INVALID_INDEX; + rt->sparse_index_by_next_index[UDP_INPUT_NEXT_PUNT] + = SPARSE_VEC_INVALID_INDEX; + +#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */); + foreach_udp6_dst_port +#undef _ + + ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index); + /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */ + return 0; +} + +VLIB_INIT_FUNCTION (udp_local_init); diff --git a/vnet/vnet/ip/udp_packet.h b/vnet/vnet/ip/udp_packet.h new file mode 100644 index 00000000000..21c30c6eb71 --- /dev/null +++ b/vnet/vnet/ip/udp_packet.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip4/udp_packet.h: UDP packet format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_udp_packet_h +#define included_udp_packet_h + +typedef struct { + /* Source and destination port. */ + u16 src_port, dst_port; + + /* Length of UDP header plus payload. */ + u16 length; + + /* Checksum of UDP pseudo-header and data or + zero if checksum is disabled. */ + u16 checksum; +} udp_header_t; + +#endif /* included_udp_packet_h */ + diff --git a/vnet/vnet/ip/udp_pg.c b/vnet/vnet/ip/udp_pg.c new file mode 100644 index 00000000000..a33a56294fb --- /dev/null +++ b/vnet/vnet/ip/udp_pg.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/udp_pg: UDP packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/pg/pg.h> +#include <vnet/ip/ip.h> /* for unformat_udp_udp_port */ + +#define UDP_PG_EDIT_LENGTH (1 << 0) +#define UDP_PG_EDIT_CHECKSUM (1 << 1) + +always_inline void +udp_pg_edit_function_inline (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, + u32 n_packets, + u32 flags) +{ + vlib_main_t * vm = pg->vlib_main; + u32 ip_offset, udp_offset; + + udp_offset = g->start_byte_offset; + ip_offset = (g-1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + udp_header_t * udp0; + u32 udp_len0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ip0 = (void *) (p0->data + ip_offset); + udp0 = (void *) (p0->data + udp_offset); + udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]); + + if (flags & UDP_PG_EDIT_LENGTH) + udp0->length = + clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0) + - ip_offset); + + /* Initialize checksum with header. */ + if (flags & UDP_PG_EDIT_CHECKSUM) + { + ip_csum_t sum0; + + sum0 = clib_mem_unaligned (&ip0->src_address, u64); + + sum0 = ip_csum_with_carry + (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16))); + + /* Invalidate possibly old checksum. */ + udp0->checksum = 0; + + sum0 = ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0, sum0); + + sum0 = ~ ip_csum_fold (sum0); + + /* Zero checksum means checksumming disabled. */ + sum0 = sum0 != 0 ? sum0 : 0xffff; + + udp0->checksum = sum0; + } + } +} + +static void +udp_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, + u32 n_packets) +{ + switch (g->edit_function_opaque) + { + case UDP_PG_EDIT_LENGTH: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_LENGTH); + break; + + case UDP_PG_EDIT_CHECKSUM: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_CHECKSUM); + break; + + case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH); + break; + + default: + ASSERT (0); + break; + } +} + +typedef struct { + pg_edit_t src_port, dst_port; + pg_edit_t length; + pg_edit_t checksum; +} pg_udp_header_t; + +static inline void +pg_udp_header_init (pg_udp_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, udp_header_t, f); + _ (src_port); + _ (dst_port); + _ (length); + _ (checksum); +#undef _ +} + +uword +unformat_pg_udp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_udp_header_t * p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t), + &group_index); + pg_udp_header_init (p); + + /* Defaults. */ + p->checksum.type = PG_EDIT_UNSPECIFIED; + p->length.type = PG_EDIT_UNSPECIFIED; + + if (! unformat (input, "UDP: %U -> %U", + unformat_pg_edit, + unformat_tcp_udp_port, &p->src_port, + unformat_pg_edit, + unformat_tcp_udp_port, &p->dst_port)) + goto error; + + /* Parse options. */ + while (1) + { + if (unformat (input, "length %U", + unformat_pg_edit, + unformat_pg_number, &p->length)) + ; + + else if (unformat (input, "checksum %U", + unformat_pg_edit, + unformat_pg_number, &p->checksum)) + ; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t * im = &ip_main; + u16 dst_port; + tcp_udp_port_info_t * pi; + + pi = 0; + if (p->dst_port.type == PG_EDIT_FIXED) + { + dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO); + pi = ip_get_tcp_udp_port_info (im, dst_port); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto error; + + p = pg_get_edit_group (s, group_index); + if (p->checksum.type == PG_EDIT_UNSPECIFIED + || p->length.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + g->edit_function = udp_pg_edit_function; + g->edit_function_opaque = 0; + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM; + if (p->length.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= UDP_PG_EDIT_LENGTH; + } + + return 1; + } + + error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + diff --git a/vnet/vnet/ipsec/esp.h b/vnet/vnet/ipsec/esp.h new file mode 100644 index 00000000000..3d46a013b5d --- /dev/null +++ b/vnet/vnet/ipsec/esp.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/devices/dpdk/dpdk.h> + +#include <openssl/hmac.h> +#include <openssl/rand.h> +#include <openssl/evp.h> + +typedef struct { + u32 spi; + u32 seq; + u8 data[0]; +} esp_header_t; + +typedef struct { + u8 pad_length; + u8 next_header; +} esp_footer_t; + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; + esp_header_t esp; +}) ip4_and_esp_header_t; + +typedef CLIB_PACKED (struct { + ip6_header_t ip6; + esp_header_t esp; +}) ip6_and_esp_header_t; + +typedef struct { + const EVP_CIPHER * type; +} esp_crypto_alg_t; + +typedef struct { + const EVP_MD * md; + u8 trunc_size; +} esp_integ_alg_t; + + +typedef struct { + esp_crypto_alg_t * esp_crypto_algs; + esp_integ_alg_t * esp_integ_algs; + EVP_CIPHER_CTX encrypt_ctx; + EVP_CIPHER_CTX decrypt_ctx; + HMAC_CTX hmac_ctx; + ipsec_crypto_alg_t last_encrytp_alg; + ipsec_crypto_alg_t last_decrytp_alg; + ipsec_integ_alg_t last_integ_alg; +} esp_main_t; + +esp_main_t esp_main; + +always_inline void +esp_init() +{ + esp_main_t * em = &esp_main; + + memset (em, 0, sizeof (em[0])); + + vec_validate(em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1); + em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128].type = EVP_aes_128_cbc(); + em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192].type = EVP_aes_192_cbc(); + em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256].type = EVP_aes_256_cbc(); + + vec_validate(em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1); + esp_integ_alg_t * i; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96]; + i->md = EVP_sha1(); + i->trunc_size = 12; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96]; + i->md = EVP_sha256(); + i->trunc_size = 12; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128]; + i->md = EVP_sha256(); + i->trunc_size = 16; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192]; + i->md = EVP_sha384(); + i->trunc_size = 24; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256]; + i->md = EVP_sha512(); + i->trunc_size = 32; + + EVP_CIPHER_CTX_init(&(em->encrypt_ctx)); + EVP_CIPHER_CTX_init(&(em->decrypt_ctx)); + HMAC_CTX_init(&(em->hmac_ctx)); +} + +always_inline unsigned int +hmac_calc(ipsec_integ_alg_t alg, + u8 * key, + int key_len, + u8 * data, + int data_len, + u8 * signature, + u8 use_esn, + u32 seq_hi) +{ + esp_main_t * em = &esp_main; + HMAC_CTX * ctx = &(em->hmac_ctx); + const EVP_MD * md = NULL; + unsigned int len; + + ASSERT(alg < IPSEC_INTEG_N_ALG); + + if (PREDICT_FALSE(em->esp_integ_algs[alg].md == 0)) + return 0; + + if (PREDICT_FALSE(alg != em->last_integ_alg)) { + md = em->esp_integ_algs[alg].md; + em->last_integ_alg = alg; + } + + HMAC_Init(ctx, key, key_len, md); + + HMAC_Update(ctx, data, data_len); + + if (PREDICT_TRUE(use_esn)) + HMAC_Update(ctx, (u8 *) &seq_hi, sizeof(seq_hi)); + HMAC_Final(ctx, signature, &len); + + return em->esp_integ_algs[alg].trunc_size; +} + diff --git a/vnet/vnet/ipsec/esp_decrypt.c b/vnet/vnet/ipsec/esp_decrypt.c new file mode 100644 index 00000000000..ad511b0fba3 --- /dev/null +++ b/vnet/vnet/ipsec/esp_decrypt.c @@ -0,0 +1,424 @@ +/* + * esp_decrypt.c : IPSec ESP decrypt node + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> + +#include <vnet/ipsec/ipsec.h> +#include <vnet/ipsec/esp.h> + +#define ESP_WINDOW_SIZE 64 + +#define foreach_esp_decrypt_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(IP6_INPUT, "ip6-input") + +#define _(v, s) ESP_DECRYPT_NEXT_##v, +typedef enum { + foreach_esp_decrypt_next +#undef _ + ESP_DECRYPT_N_NEXT, +} esp_decrypt_next_t; + + +#define foreach_esp_decrypt_error \ + _(RX_PKTS, "ESP pkts received") \ + _(NO_BUFFER, "No buffer (packed dropped)") \ + _(DECRYPTION_FAILED, "ESP decryption failed") \ + _(INTEG_ERROR, "Integrity check failed") \ + _(REPLAY, "SA replayed packet") + + +typedef enum { +#define _(sym,str) ESP_DECRYPT_ERROR_##sym, + foreach_esp_decrypt_error +#undef _ + ESP_DECRYPT_N_ERROR, +} esp_decrypt_error_t; + +static char * esp_decrypt_error_strings[] = { +#define _(sym,string) string, + foreach_esp_decrypt_error +#undef _ +}; + +typedef struct { + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; +} esp_decrypt_trace_t; + +/* packet trace format function */ +static u8 * format_esp_decrypt_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *); + + s = format (s, "esp: crypto %U integrity %U", + format_ipsec_crypto_alg, t->crypto_alg, + format_ipsec_integ_alg, t->integ_alg); + return s; +} + +always_inline void +esp_decrypt_aes_cbc(ipsec_crypto_alg_t alg, + u8 * in, + u8 * out, + size_t in_len, + u8 * key, + u8 * iv) +{ + esp_main_t * em = &esp_main; + EVP_CIPHER_CTX * ctx = &(em->decrypt_ctx); + const EVP_CIPHER * cipher = NULL; + int out_len; + + ASSERT(alg < IPSEC_CRYPTO_N_ALG); + + if (PREDICT_FALSE(em->esp_crypto_algs[alg].type == 0)) + return; + + if (PREDICT_FALSE(alg != em->last_decrytp_alg)) { + cipher = em->esp_crypto_algs[alg].type; + em->last_decrytp_alg = alg; + } + + EVP_DecryptInit_ex(ctx, cipher, NULL, key, iv); + + EVP_DecryptUpdate(ctx, out, &out_len, in, in_len); + EVP_DecryptFinal_ex(ctx, out + out_len, &out_len); +} + +always_inline int +esp_replay_check (ipsec_sa_t * sa, u32 seq) +{ + u32 diff; + + if (PREDICT_TRUE(seq > sa->last_seq)) + return 0; + + diff = sa->last_seq - seq; + + if (ESP_WINDOW_SIZE > diff) + return (sa->replay_window & (1ULL << diff)) ? 1 : 0; + else + return 1; + + return 0; +} + +always_inline int +esp_replay_check_esn (ipsec_sa_t * sa, u32 seq) +{ + u32 tl = sa->last_seq; + u32 th = sa->last_seq_hi; + u32 diff = tl - seq; + + if (PREDICT_TRUE(tl >= (ESP_WINDOW_SIZE - 1))) + { + if (seq >= (tl - ESP_WINDOW_SIZE + 1)) + { + sa->seq_hi = th; + if (seq <= tl) + return (sa->replay_window & (1ULL << diff)) ? 1 : 0; + else + return 0; + } + else + { + sa->seq_hi = th + 1; + return 0; + } + } + else + { + if (seq >= (tl - ESP_WINDOW_SIZE + 1)) + { + sa->seq_hi = th - 1; + return (sa->replay_window & (1ULL << diff)) ? 1 : 0; + } + else + { + sa->seq_hi = th; + if (seq <= tl) + return (sa->replay_window & (1ULL << diff)) ? 1 : 0; + else + return 0; + } + } + + return 0; +} + +always_inline void +esp_replay_advance (ipsec_sa_t * sa, u32 seq) +{ + u32 pos; + + if (seq > sa->last_seq) + { + pos = seq - sa->last_seq; + if (pos < ESP_WINDOW_SIZE) + sa->replay_window = ((sa->replay_window) << pos) | 1; + else + sa->replay_window = 1; + sa->last_seq = seq; + } + else + { + pos = sa->last_seq - seq; + sa->replay_window |= (1ULL << pos); + } +} + +always_inline void +esp_replay_advance_esn (ipsec_sa_t * sa, u32 seq) +{ + int wrap = sa->seq_hi - sa->last_seq_hi; + u32 pos; + + if (wrap == 0 && seq > sa->last_seq) + { + pos = seq - sa->last_seq; + if (pos < ESP_WINDOW_SIZE) + sa->replay_window = ((sa->replay_window) << pos) | 1; + else + sa->replay_window = 1; + sa->last_seq = seq; + } + else if (wrap > 0) + { + pos = ~seq + sa->last_seq + 1; + if (pos < ESP_WINDOW_SIZE) + sa->replay_window = ((sa->replay_window) << pos) | 1; + else + sa->replay_window = 1; + sa->last_seq = seq; + sa->last_seq_hi = sa->seq_hi; + } + else if (wrap < 0) + { + pos = ~seq + sa->last_seq + 1; + sa->replay_window |= (1ULL << pos); + } + else + { + pos = sa->last_seq - seq; + sa->replay_window |= (1ULL << pos); + } +} + +static uword +esp_decrypt_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, next_index, *to_next; + ipsec_main_t *im = &ipsec_main; + esp_main_t *em = &esp_main; + u32 * recycle = 0; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + ipsec_alloc_empty_buffers(vm, im); + + if (PREDICT_FALSE(vec_len (im->empty_buffers) < n_left_from)){ + vlib_node_increment_counter (vm, esp_decrypt_node.index, + ESP_DECRYPT_ERROR_NO_BUFFER, n_left_from); + goto free_buffers_and_exit; + } + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 i_bi0, o_bi0 = (u32) ~0, next0; + vlib_buffer_t * i_b0; + vlib_buffer_t * o_b0 = 0; + esp_header_t * esp0; + ipsec_sa_t * sa0; + u32 sa_index0 = ~0; + u32 seq; + + i_bi0 = from[0]; + from += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + next0 = ESP_DECRYPT_NEXT_DROP; + + i_b0 = vlib_get_buffer (vm, i_bi0); + esp0 = vlib_buffer_get_current (i_b0); + + sa_index0 = vnet_buffer(i_b0)->output_features.ipsec_sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + seq = clib_host_to_net_u32(esp0->seq); + + /* anti-replay check */ + if (sa0->use_anti_replay) + { + int rv = 0; + + if (PREDICT_TRUE(sa0->use_esn)) + rv = esp_replay_check_esn(sa0, seq); + else + rv = esp_replay_check(sa0, seq); + + if (PREDICT_FALSE(rv)) + { + clib_warning("anti-replay SPI %u seq %u", sa0->spi, seq); + vlib_node_increment_counter (vm, esp_decrypt_node.index, + ESP_DECRYPT_ERROR_REPLAY, 1); + o_bi0 = i_bi0; + goto trace; + } + } + + if (PREDICT_TRUE(sa0->integ_alg != IPSEC_INTEG_ALG_NONE)) + { + u8 sig[64]; + int icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + memset(sig, 0, sizeof(sig)); + u8 * icv = vlib_buffer_get_current (i_b0) + i_b0->current_length - icv_size; + i_b0->current_length -= icv_size; + + hmac_calc(sa0->integ_alg, sa0->integ_key, sa0->integ_key_len, + (u8 *) esp0, i_b0->current_length, sig, sa0->use_esn, + sa0->seq_hi); + + if (PREDICT_FALSE(memcmp(icv, sig, icv_size))) + { + vlib_node_increment_counter (vm, esp_decrypt_node.index, + ESP_DECRYPT_ERROR_INTEG_ERROR, 1); + o_bi0 = i_bi0; + goto trace; + } + } + + if (PREDICT_TRUE(sa0->use_anti_replay)) + { + if (PREDICT_TRUE(sa0->use_esn)) + esp_replay_advance_esn(sa0, seq); + else + esp_replay_advance(sa0, seq); + } + + /* grab free buffer */ + uword last_empty_buffer = vec_len (im->empty_buffers) - 1; + o_bi0 = im->empty_buffers[last_empty_buffer]; + o_b0 = vlib_get_buffer (vm, o_bi0); + vlib_prefetch_buffer_with_index (vm, im->empty_buffers[last_empty_buffer-1], STORE); + _vec_len (im->empty_buffers) = last_empty_buffer; + + /* add old buffer to the recycle list */ + vec_add1(recycle, i_bi0); + + if (sa0->crypto_alg >= IPSEC_CRYPTO_ALG_AES_CBC_128 && + sa0->crypto_alg <= IPSEC_CRYPTO_ALG_AES_CBC_256) { + const int BLOCK_SIZE = 16; + const int IV_SIZE = 16; + esp_footer_t * f0; + + int blocks = (i_b0->current_length - sizeof (esp_header_t) - IV_SIZE) / BLOCK_SIZE; + + o_b0->current_data = sizeof(ethernet_header_t); + + esp_decrypt_aes_cbc(sa0->crypto_alg, + esp0->data + IV_SIZE, + (u8 *) vlib_buffer_get_current (o_b0), + BLOCK_SIZE * blocks, + sa0->crypto_key, + esp0->data); + + o_b0->current_length = (blocks * 16) - 2; + o_b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (o_b0) + o_b0->current_length); + o_b0->current_length -= f0->pad_length; + if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP)) + next0 = ESP_DECRYPT_NEXT_IP4_INPUT; + else if (f0->next_header == IP_PROTOCOL_IPV6) + next0 = ESP_DECRYPT_NEXT_IP6_INPUT; + else + { + clib_warning("next header: 0x%x", f0->next_header); + vlib_node_increment_counter (vm, esp_decrypt_node.index, + ESP_DECRYPT_ERROR_DECRYPTION_FAILED, + 1); + o_b0 = 0; + goto trace; + } + + to_next[0] = o_bi0; + to_next += 1; + + vnet_buffer (o_b0)->sw_if_index[VLIB_TX] = (u32)~0; + } + +trace: + if (PREDICT_FALSE(i_b0->flags & VLIB_BUFFER_IS_TRACED)) { + if (o_b0) { + o_b0->flags |= VLIB_BUFFER_IS_TRACED; + o_b0->trace_index = i_b0->trace_index; + } + esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, o_b0, sizeof (*tr)); + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, o_bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, esp_decrypt_node.index, + ESP_DECRYPT_ERROR_RX_PKTS, + from_frame->n_vectors); + +free_buffers_and_exit: + vlib_buffer_free (vm, recycle, vec_len(recycle)); + vec_free(recycle); + return from_frame->n_vectors; +} + + +VLIB_REGISTER_NODE (esp_decrypt_node) = { + .function = esp_decrypt_node_fn, + .name = "esp-decrypt", + .vector_size = sizeof (u32), + .format_trace = format_esp_decrypt_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(esp_decrypt_error_strings), + .error_strings = esp_decrypt_error_strings, + + .n_next_nodes = ESP_DECRYPT_N_NEXT, + .next_nodes = { +#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, + foreach_esp_decrypt_next +#undef _ + }, +}; + diff --git a/vnet/vnet/ipsec/esp_encrypt.c b/vnet/vnet/ipsec/esp_encrypt.c new file mode 100644 index 00000000000..68add4c3d57 --- /dev/null +++ b/vnet/vnet/ipsec/esp_encrypt.c @@ -0,0 +1,386 @@ +/* + * esp_encrypt.c : IPSec ESP encrypt node + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> + +#include <vnet/ipsec/ipsec.h> +#include <vnet/ipsec/esp.h> + +#define ESP_SEQ_MAX (4294967295UL) + +#define foreach_esp_encrypt_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(IP6_INPUT, "ip6-input") \ +_(INTERFACE_OUTPUT, "interface-output") + +#define _(v, s) ESP_ENCRYPT_NEXT_##v, +typedef enum { + foreach_esp_encrypt_next +#undef _ + ESP_ENCRYPT_N_NEXT, +} esp_encrypt_next_t; + +#define foreach_esp_encrypt_error \ + _(RX_PKTS, "ESP pkts received") \ + _(NO_BUFFER, "No buffer (packet dropped)") \ + _(DECRYPTION_FAILED, "ESP encryption failed") \ + _(SEQ_CYCLED, "sequence number cycled") + + +typedef enum { +#define _(sym,str) ESP_ENCRYPT_ERROR_##sym, + foreach_esp_encrypt_error +#undef _ + ESP_ENCRYPT_N_ERROR, +} esp_encrypt_error_t; + +static char * esp_encrypt_error_strings[] = { +#define _(sym,string) string, + foreach_esp_encrypt_error +#undef _ +}; + +vlib_node_registration_t esp_encrypt_node; + +typedef struct { + u32 spi; + u32 seq; + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; +} esp_encrypt_trace_t; + +/* packet trace format function */ +static u8 * format_esp_encrypt_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + esp_encrypt_trace_t * t = va_arg (*args, esp_encrypt_trace_t *); + + s = format (s, "esp: spi %u seq %u crypto %U integrity %U", + t->spi, t->seq, + format_ipsec_crypto_alg, t->crypto_alg, + format_ipsec_integ_alg, t->integ_alg); + return s; +} + +always_inline void +esp_encrypt_aes_cbc(ipsec_crypto_alg_t alg, + u8 * in, + u8 * out, + size_t in_len, + u8 * key, + u8 * iv) +{ + esp_main_t * em = &esp_main; + EVP_CIPHER_CTX * ctx = &(em->encrypt_ctx); + const EVP_CIPHER * cipher = NULL; + int out_len; + + ASSERT(alg < IPSEC_CRYPTO_N_ALG); + + if (PREDICT_FALSE(em->esp_crypto_algs[alg].type == IPSEC_CRYPTO_ALG_NONE)) + return; + + if (PREDICT_FALSE(alg != em->last_encrytp_alg)) { + cipher = em->esp_crypto_algs[alg].type; + em->last_encrytp_alg = alg; + } + + EVP_EncryptInit_ex(ctx, cipher, NULL, key, iv); + + EVP_EncryptUpdate(ctx, out, &out_len, in, in_len); + EVP_EncryptFinal_ex(ctx, out + out_len, &out_len); +} + +always_inline int +esp_seq_advance (ipsec_sa_t * sa) +{ + if (PREDICT_TRUE(sa->use_esn)) + { + if (PREDICT_FALSE(sa->seq == ESP_SEQ_MAX)) + { + if (PREDICT_FALSE(sa->use_anti_replay && sa->seq_hi == ESP_SEQ_MAX)) + return 1; + sa->seq_hi++; + } + sa->seq++; + } + else + { + if (PREDICT_FALSE(sa->use_anti_replay && sa->seq == ESP_SEQ_MAX)) + return 1; + sa->seq++; + } + + return 0; +} + +static uword +esp_encrypt_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, * to_next = 0, next_index; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + ipsec_main_t *im = &ipsec_main; + u32 * recycle = 0; + + ipsec_alloc_empty_buffers(vm, im); + + if (PREDICT_FALSE(vec_len (im->empty_buffers) < n_left_from)){ + vlib_node_increment_counter (vm, esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_NO_BUFFER, n_left_from); + clib_warning("no enough empty buffers. discarding frame"); + goto free_buffers_and_exit; + } + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 i_bi0, o_bi0, next0; + vlib_buffer_t * i_b0, *o_b0 = 0; + u32 sa_index0; + ipsec_sa_t * sa0; + ip4_and_esp_header_t * ih0, * oh0 = 0; + ip6_and_esp_header_t * ih6_0, * oh6_0 = 0; + uword last_empty_buffer; + esp_header_t * o_esp0; + esp_footer_t *f0; + u8 is_ipv6; + u8 ip_hdr_size; + u8 next_hdr_type; + + i_bi0 = from[0]; + from += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + next0 = ESP_ENCRYPT_NEXT_DROP; + + i_b0 = vlib_get_buffer (vm, i_bi0); + sa_index0 = vnet_buffer(i_b0)->output_features.ipsec_sad_index; + sa0 = pool_elt_at_index(im->sad, sa_index0); + + if (PREDICT_FALSE(esp_seq_advance(sa0))) + { + clib_warning("sequence number counter has cycled SPI %u", sa0->spi); + vlib_node_increment_counter (vm, esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1); + //TODO: rekey SA + o_bi0 = i_bi0; + goto trace; + } + + /* grab free buffer */ + last_empty_buffer = vec_len (im->empty_buffers) - 1; + o_bi0 = im->empty_buffers[last_empty_buffer]; + o_b0 = vlib_get_buffer (vm, o_bi0); + o_b0->current_data = sizeof(ethernet_header_t); + ih0 = vlib_buffer_get_current (i_b0); + vlib_prefetch_buffer_with_index (vm, im->empty_buffers[last_empty_buffer-1], STORE); + _vec_len (im->empty_buffers) = last_empty_buffer; + to_next[0] = o_bi0; + to_next += 1; + + /* add old buffer to the recycle list */ + vec_add1(recycle, i_bi0); + + /* is ipv6 */ + if (PREDICT_FALSE((ih0->ip4.ip_version_and_header_length & 0xF0 ) == 0x60)) + { + is_ipv6 = 1; + ih6_0 = vlib_buffer_get_current (i_b0); + ip_hdr_size = sizeof(ip6_header_t); + next_hdr_type = IP_PROTOCOL_IPV6; + oh6_0 = vlib_buffer_get_current (o_b0); + o_esp0 = vlib_buffer_get_current (o_b0) + sizeof(ip6_header_t); + + oh6_0->ip6.ip_version_traffic_class_and_flow_label = + ih6_0->ip6.ip_version_traffic_class_and_flow_label; + oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP; + oh6_0->ip6.hop_limit = 254; + oh6_0->esp.spi = clib_net_to_host_u32(sa0->spi); + oh6_0->esp.seq = clib_net_to_host_u32(sa0->seq); + } + else + { + is_ipv6 = 0; + ip_hdr_size = sizeof(ip4_header_t); + next_hdr_type = IP_PROTOCOL_IP_IN_IP; + oh0 = vlib_buffer_get_current (o_b0); + o_esp0 = vlib_buffer_get_current (o_b0) + sizeof(ip4_header_t); + + oh0->ip4.ip_version_and_header_length = 0x45; + oh0->ip4.tos = ih0->ip4.tos; + oh0->ip4.fragment_id = 0; + oh0->ip4.flags_and_fragment_offset = 0; + oh0->ip4.ttl = 254; + oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP; + oh0->esp.spi = clib_net_to_host_u32(sa0->spi); + oh0->esp.seq = clib_net_to_host_u32(sa0->seq); + } + + if (PREDICT_TRUE(sa0->is_tunnel && !sa0->is_tunnel_ip6)) + { + oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32; + oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32; + + /* in tunnel mode send it back to FIB */ + next0 = ESP_ENCRYPT_NEXT_IP4_INPUT; + vnet_buffer (o_b0)->sw_if_index[VLIB_TX] = (u32)~0; + } + else if(sa0->is_tunnel && sa0->is_tunnel_ip6) + { + oh6_0->ip6.src_address.as_u64[0] = sa0->tunnel_src_addr.ip6.as_u64[0]; + oh6_0->ip6.src_address.as_u64[1] = sa0->tunnel_src_addr.ip6.as_u64[1]; + oh6_0->ip6.dst_address.as_u64[0] = sa0->tunnel_dst_addr.ip6.as_u64[0]; + oh6_0->ip6.dst_address.as_u64[1] = sa0->tunnel_dst_addr.ip6.as_u64[1]; + + /* in tunnel mode send it back to FIB */ + next0 = ESP_ENCRYPT_NEXT_IP6_INPUT; + vnet_buffer (o_b0)->sw_if_index[VLIB_TX] = (u32)~0; + } + else + { + next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT; + vnet_buffer (o_b0)->sw_if_index[VLIB_TX] = + vnet_buffer (i_b0)->sw_if_index[VLIB_TX]; + } + + ASSERT(sa0->crypto_alg < IPSEC_CRYPTO_N_ALG); + + if (PREDICT_TRUE(sa0->crypto_alg != IPSEC_CRYPTO_ALG_NONE)) { + + const int BLOCK_SIZE = 16; + const int IV_SIZE = 16; + int blocks = 1 + (i_b0->current_length + 1) / BLOCK_SIZE; + + /* pad packet in input buffer */ + u8 pad_bytes = BLOCK_SIZE * blocks - 2 - i_b0->current_length; + u8 i; + u8 * padding = vlib_buffer_get_current (i_b0) + i_b0->current_length; + i_b0->current_length = BLOCK_SIZE * blocks; + for (i = 0; i < pad_bytes; ++i) + { + padding[i] = i + 1; + } + f0 = vlib_buffer_get_current (i_b0) + i_b0->current_length - 2; + f0->pad_length = pad_bytes; + f0->next_header = next_hdr_type; + + o_b0->current_length = ip_hdr_size + sizeof(esp_header_t) + + BLOCK_SIZE * blocks + IV_SIZE; + + vnet_buffer (o_b0)->sw_if_index[VLIB_RX] = + vnet_buffer (i_b0)->sw_if_index[VLIB_RX]; + o_b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + + u8 iv[16]; + RAND_bytes(iv, sizeof(iv)); + + memcpy((u8 *) vlib_buffer_get_current (o_b0) + ip_hdr_size + + sizeof(esp_header_t), iv, 16 ); + + esp_encrypt_aes_cbc(sa0->crypto_alg, + (u8 *) vlib_buffer_get_current (i_b0), + (u8 *) vlib_buffer_get_current (o_b0) + + ip_hdr_size + sizeof(esp_header_t) + IV_SIZE, + BLOCK_SIZE * blocks, + sa0->crypto_key, + iv); + } + + o_b0->current_length += hmac_calc(sa0->integ_alg, sa0->integ_key, + sa0->integ_key_len, + (u8 *) o_esp0, + o_b0->current_length - ip_hdr_size, + vlib_buffer_get_current (o_b0) + + o_b0->current_length, + sa0->use_esn, + sa0->seq_hi); + + + if (PREDICT_FALSE(is_ipv6)) + { + oh6_0->ip6.payload_length = clib_host_to_net_u16 ( + vlib_buffer_length_in_chain (vm, o_b0) - sizeof(ip6_header_t)); + } + else + { + oh0->ip4.length = clib_host_to_net_u16 ( + vlib_buffer_length_in_chain (vm, o_b0)); + oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4); + } + +trace: + if (PREDICT_FALSE(i_b0->flags & VLIB_BUFFER_IS_TRACED)) { + if (o_b0) { + o_b0->flags |= VLIB_BUFFER_IS_TRACED; + o_b0->trace_index = i_b0->trace_index; + } + esp_encrypt_trace_t *tr = vlib_add_trace (vm, node, o_b0, sizeof (*tr)); + tr->spi = sa0->spi; + tr->seq = sa0->seq - 1; + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, o_bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_RX_PKTS, + from_frame->n_vectors); + +free_buffers_and_exit: + vlib_buffer_free (vm, recycle, vec_len(recycle)); + vec_free(recycle); + return from_frame->n_vectors; +} + + +VLIB_REGISTER_NODE (esp_encrypt_node) = { + .function = esp_encrypt_node_fn, + .name = "esp-encrypt", + .vector_size = sizeof (u32), + .format_trace = format_esp_encrypt_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(esp_encrypt_error_strings), + .error_strings = esp_encrypt_error_strings, + + .n_next_nodes = ESP_ENCRYPT_N_NEXT, + .next_nodes = { +#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n, + foreach_esp_encrypt_next +#undef _ + }, +}; + diff --git a/vnet/vnet/ipsec/ikev2.c b/vnet/vnet/ipsec/ikev2.c new file mode 100644 index 00000000000..ab2277f5f90 --- /dev/null +++ b/vnet/vnet/ipsec/ikev2.c @@ -0,0 +1,2142 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/ip/udp.h> +#include <vnet/ipsec/ipsec.h> +#include <vnet/ipsec/ikev2.h> +#include <vnet/ipsec/ikev2_priv.h> + +static int ikev2_delete_tunnel_interface(vnet_main_t * vnm, + ikev2_sa_t *sa, + ikev2_child_sa_t * child); + +static void hexdump(u8 buffer[], int len) +{ +#define HEXDUMP_LINE_LEN 16 + int i; + char s[HEXDUMP_LINE_LEN+1]; + bzero(s, HEXDUMP_LINE_LEN+1); + + for(i=0; i < len; i++) { + if (!(i%HEXDUMP_LINE_LEN)) { + if (s[0]) + printf("[%s]",s); + printf("\n%05x: ", i); + bzero(s, HEXDUMP_LINE_LEN); + } + s[i%HEXDUMP_LINE_LEN]=isprint(buffer[i])?buffer[i]:'.'; + printf("%02x ", buffer[i]); + } + while(i++%HEXDUMP_LINE_LEN) + printf(" "); + + printf("[%s]\n", s); +} + +#define ikev2_set_state(sa, v) do { \ + (sa)->state = v; \ + clib_warning("sa state changed to " #v); \ + } while(0); + +typedef struct { + u32 next_index; + u32 sw_if_index; +} ikev2_trace_t; + +static u8 * format_ikev2_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ikev2_trace_t * t = va_arg (*args, ikev2_trace_t *); + + s = format (s, "ikev2: sw_if_index %d, next index %d", + t->sw_if_index, t->next_index); + return s; +} + +vlib_node_registration_t ikev2_node; + +#define foreach_ikev2_error \ +_(PROCESSED, "IKEv2 packets processed") \ +_(IKE_SA_INIT_RETRANSMIT, "IKE_SA_INIT retransmit ") \ +_(IKE_SA_INIT_IGNORE, "IKE_SA_INIT ignore (IKE SA already auth)") \ +_(IKE_REQ_RETRANSMIT, "IKE request retransmit") \ +_(IKE_REQ_IGNORE, "IKE request ignore (old msgid)") \ +_(NOT_IKEV2, "Non IKEv2 packets received") + +typedef enum { +#define _(sym,str) IKEV2_ERROR_##sym, + foreach_ikev2_error +#undef _ + IKEV2_N_ERROR, +} ikev2_error_t; + +static char * ikev2_error_strings[] = { +#define _(sym,string) string, + foreach_ikev2_error +#undef _ +}; + +typedef enum { + IKEV2_NEXT_IP4_LOOKUP, + IKEV2_NEXT_ERROR_DROP, + IKEV2_N_NEXT, +} ikev2_next_t; + +static ikev2_sa_transform_t * +ikev2_find_transform_data(ikev2_sa_transform_t * t) +{ + ikev2_main_t * km = &ikev2_main; + ikev2_sa_transform_t * td; + + vec_foreach(td, km->supported_transforms) + { + if (td->type != t->type) + continue; + + if (td->transform_id != t->transform_id) + continue; + + if (td->type == IKEV2_TRANSFORM_TYPE_ENCR) + { + if (vec_len(t->attrs) != 4 || t->attrs[0] != 0x80 || t->attrs[1] != 14) + continue; + + if (((t->attrs[2] << 8 | t->attrs[3]) / 8) != td->key_len) + continue; + } + return td; + } + return 0; +} + +static ikev2_sa_proposal_t * +ikev2_select_proposal(ikev2_sa_proposal_t *proposals, ikev2_protocol_id_t prot_id) +{ + ikev2_sa_proposal_t * rv = 0; + ikev2_sa_proposal_t * proposal; + ikev2_sa_transform_t * transform, * new_t; + u8 mandatory_bitmap, optional_bitmap; + + if (prot_id == IKEV2_PROTOCOL_IKE) + { + mandatory_bitmap = (1 << IKEV2_TRANSFORM_TYPE_ENCR) | + (1 << IKEV2_TRANSFORM_TYPE_PRF) | + (1 << IKEV2_TRANSFORM_TYPE_INTEG) | + (1 << IKEV2_TRANSFORM_TYPE_DH); + optional_bitmap = mandatory_bitmap; + } + else if (prot_id == IKEV2_PROTOCOL_ESP) + { + mandatory_bitmap = (1 << IKEV2_TRANSFORM_TYPE_ENCR) | + (1 << IKEV2_TRANSFORM_TYPE_ESN); + optional_bitmap = mandatory_bitmap | + (1 << IKEV2_TRANSFORM_TYPE_INTEG) | + (1 << IKEV2_TRANSFORM_TYPE_DH); + } + else if (prot_id == IKEV2_PROTOCOL_AH) + { + mandatory_bitmap = (1 << IKEV2_TRANSFORM_TYPE_INTEG) | + (1 << IKEV2_TRANSFORM_TYPE_ESN); + optional_bitmap = mandatory_bitmap | + (1 << IKEV2_TRANSFORM_TYPE_DH); + } + else + return 0; + + vec_add2(rv, proposal, 1); + + vec_foreach(proposal, proposals) + { + u8 bitmap = 0; + if (proposal->protocol_id != prot_id) + continue; + + vec_foreach(transform, proposal->transforms) + { + if ((1 << transform->type) & bitmap) + continue; + + if (ikev2_find_transform_data(transform)) + { + bitmap |= 1 << transform->type; + vec_add2(rv->transforms, new_t, 1); + memcpy(new_t, transform, sizeof(*new_t)); + new_t->attrs = vec_dup(transform->attrs); + } + } + + clib_warning("bitmap is %x mandatory is %x optional is %x", + bitmap, mandatory_bitmap, optional_bitmap); + + if ((bitmap & mandatory_bitmap) == mandatory_bitmap && + (bitmap & ~optional_bitmap) == 0) + { + rv->proposal_num = proposal->proposal_num; + rv->protocol_id = proposal->protocol_id; + RAND_bytes((u8 *) &rv->spi, sizeof(rv->spi)); + goto done; + } + else + { + vec_free(rv->transforms); + } + } + + vec_free(rv); +done: + return rv; +} + +ikev2_sa_transform_t * +ikev2_sa_get_td_for_type(ikev2_sa_proposal_t * p, ikev2_transform_type_t type) +{ + ikev2_sa_transform_t * t; + + if (!p) + return 0; + + vec_foreach(t, p->transforms) + { + if (t->type == type) + return ikev2_find_transform_data(t); + } + return 0; +} + +ikev2_child_sa_t * +ikev2_sa_get_child(ikev2_sa_t * sa, u32 spi, ikev2_protocol_id_t prot_id) +{ + ikev2_child_sa_t * c; + vec_foreach(c, sa->childs) + { + if (c->i_proposals[0].spi == spi && c->i_proposals[0].protocol_id == prot_id) + return c; + } + + return 0; +} + +void +ikev2_sa_free_proposal_vector(ikev2_sa_proposal_t ** v) +{ + ikev2_sa_proposal_t * p; + ikev2_sa_transform_t * t; + + if (!*v) + return; + + vec_foreach(p, *v) { + vec_foreach(t, p->transforms) { + vec_free(t->attrs); + } + vec_free(p->transforms); + } + vec_free(*v); +}; + +static void +ikev2_sa_free_all_child_sa(ikev2_child_sa_t ** childs) +{ + ikev2_child_sa_t * c; + vec_foreach(c, *childs) + { + ikev2_sa_free_proposal_vector(&c->r_proposals); + ikev2_sa_free_proposal_vector(&c->i_proposals); + vec_free(c->sk_ai); + vec_free(c->sk_ar); + vec_free(c->sk_ei); + vec_free(c->sk_er); + } + + vec_free(*childs); +} + +static void +ikev2_sa_del_child_sa(ikev2_sa_t * sa, ikev2_child_sa_t * child) +{ + ikev2_sa_free_proposal_vector(&child->r_proposals); + ikev2_sa_free_proposal_vector(&child->i_proposals); + vec_free(child->sk_ai); + vec_free(child->sk_ar); + vec_free(child->sk_ei); + vec_free(child->sk_er); + + vec_del1(sa->childs, child - sa->childs); +} + +static void +ikev2_sa_free_all_vec(ikev2_sa_t *sa) +{ + vec_free(sa->i_nonce); + vec_free(sa->i_dh_data); + vec_free(sa->dh_shared_key); + + ikev2_sa_free_proposal_vector(&sa->r_proposals); + ikev2_sa_free_proposal_vector(&sa->i_proposals); + + vec_free(sa->sk_d); + vec_free(sa->sk_ai); + vec_free(sa->sk_ar); + vec_free(sa->sk_ei); + vec_free(sa->sk_er); + vec_free(sa->sk_pi); + vec_free(sa->sk_pr); + + vec_free(sa->i_id.data); + vec_free(sa->i_auth.data); + vec_free(sa->r_id.data); + vec_free(sa->r_auth.data); + if (sa->r_auth.key) + EVP_PKEY_free(sa->r_auth.key); + + vec_free(sa->del); + + ikev2_sa_free_all_child_sa(&sa->childs); +} + +static void +ikev2_delete_sa(ikev2_sa_t *sa) +{ + ikev2_main_t * km = &ikev2_main; + uword * p; + + ikev2_sa_free_all_vec(sa); + + p = hash_get(km->sa_by_rspi, sa->rspi); + if (p) + { + hash_unset(km->sa_by_rspi, sa->rspi); + pool_put(km->sas, sa); + } +} + +static void +ikev2_generate_sa_init_data(ikev2_sa_t *sa) +{ + ikev2_sa_transform_t * t = 0, * t2; + ikev2_main_t * km = &ikev2_main; + + if (sa->dh_group == IKEV2_TRANSFORM_DH_TYPE_NONE) + { + return; + } + + /* check if received DH group is on our list of supported groups */ + vec_foreach(t2, km->supported_transforms) + { + if (t2->type == IKEV2_TRANSFORM_TYPE_DH && + sa->dh_group == t2->dh_type) + { + t = t2; + break; + } + } + + if (!t) + { + clib_warning("unknown dh data group %u (data len %u)", sa->dh_group, + vec_len(sa->i_dh_data)); + sa->dh_group = IKEV2_TRANSFORM_DH_TYPE_NONE; + return; + } + + /* generate rspi */ + RAND_bytes((u8 *) &sa->rspi, 8); + + /* generate nonce */ + sa->r_nonce = vec_new(u8, IKEV2_NONCE_SIZE); + RAND_bytes((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE); + + /* generate dh keys */ + ikev2_generate_dh(sa, t); +} + +static void +ikev2_calc_keys(ikev2_sa_t *sa) +{ + u8 * tmp; + /* calculate SKEYSEED = prf(Ni | Nr, g^ir) */ + u8 * skeyseed = 0; + u8 * s = 0; + ikev2_sa_transform_t * tr_encr, * tr_prf, * tr_integ; + tr_encr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR); + tr_prf = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF); + tr_integ = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG); + + vec_append(s, sa->i_nonce); + vec_append(s, sa->r_nonce); + skeyseed = ikev2_calc_prf(tr_prf, s, sa->dh_shared_key); + + /* Calculate S = Ni | Nr | SPIi | SPIr*/ + u64 * spi; + vec_add2(s, tmp, 2 * sizeof(*spi)); + spi = (u64 *) tmp; + spi[0] = clib_host_to_net_u64(sa->ispi); + spi[1] = clib_host_to_net_u64(sa->rspi); + + /* calculate PRFplus */ + u8 * keymat; + int len = tr_prf->key_trunc + /* SK_d */ + tr_integ->key_len * 2 + /* SK_ai, SK_ar */ + tr_encr->key_len * 2 + /* SK_ei, SK_er */ + tr_prf->key_len * 2 ; /* SK_pi, SK_pr */ + + keymat = ikev2_calc_prfplus(tr_prf, skeyseed, s, len); + vec_free(skeyseed); + vec_free(s); + + int pos = 0; + + /* SK_d */ + sa->sk_d = vec_new(u8, tr_prf->key_trunc); + memcpy(sa->sk_d, keymat + pos, tr_prf->key_trunc); + pos += tr_prf->key_trunc; + + /* SK_ai */ + sa->sk_ai = vec_new(u8, tr_integ->key_len); + memcpy(sa->sk_ai, keymat + pos, tr_integ->key_len); + pos += tr_integ->key_len; + + /* SK_ar */ + sa->sk_ar = vec_new(u8, tr_integ->key_len); + memcpy(sa->sk_ar, keymat + pos, tr_integ->key_len); + pos += tr_integ->key_len; + + /* SK_ei */ + sa->sk_ei = vec_new(u8, tr_encr->key_len); + memcpy(sa->sk_ei, keymat + pos, tr_encr->key_len); + pos += tr_encr->key_len; + + /* SK_er */ + sa->sk_er = vec_new(u8, tr_encr->key_len); + memcpy(sa->sk_er, keymat + pos, tr_encr->key_len); + pos += tr_encr->key_len; + + /* SK_pi */ + sa->sk_pi = vec_new(u8, tr_prf->key_len); + memcpy(sa->sk_pi, keymat + pos, tr_prf->key_len); + pos += tr_prf->key_len; + + /* SK_pr */ + sa->sk_pr = vec_new(u8, tr_prf->key_len); + memcpy(sa->sk_pr, keymat + pos, tr_prf->key_len); + pos += tr_prf->key_len; + + vec_free(keymat); +} + +static void +ikev2_calc_child_keys(ikev2_sa_t *sa, ikev2_child_sa_t * child) +{ + u8 * s = 0; + ikev2_sa_transform_t * tr_prf, * ctr_encr, * ctr_integ; + tr_prf = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF); + ctr_encr = ikev2_sa_get_td_for_type(child->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR); + ctr_integ = ikev2_sa_get_td_for_type(child->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG); + + vec_append(s, sa->i_nonce); + vec_append(s, sa->r_nonce); + /* calculate PRFplus */ + u8 * keymat; + int len = ctr_encr->key_len * 2 + ctr_integ->key_len * 2; + + keymat = ikev2_calc_prfplus(tr_prf, sa->sk_d, s, len); + hexdump(keymat, vec_len(keymat)); + + int pos = 0; + + /* SK_ei */ + child->sk_ei = vec_new(u8, ctr_encr->key_len); + memcpy(child->sk_ei, keymat + pos, ctr_encr->key_len); + pos += ctr_encr->key_len; + + /* SK_ai */ + child->sk_ai = vec_new(u8, ctr_integ->key_len); + memcpy(child->sk_ai, keymat + pos, ctr_integ->key_len); + pos += ctr_integ->key_len; + + /* SK_er */ + child->sk_er = vec_new(u8, ctr_encr->key_len); + memcpy(child->sk_er, keymat + pos, ctr_encr->key_len); + pos += ctr_encr->key_len; + + /* SK_ar */ + child->sk_ar = vec_new(u8, ctr_integ->key_len); + memcpy(child->sk_ar, keymat + pos, ctr_integ->key_len); + pos += ctr_integ->key_len; + + ASSERT(pos == len); + + vec_free(keymat); +} + +static void +ikev2_process_sa_init_req(vlib_main_t * vm, ikev2_sa_t *sa, ike_header_t * ike) +{ + int p = 0; + u32 len = clib_net_to_host_u32(ike->length); + u8 payload = ike->nextpayload; + + clib_warning("ispi %lx rspi %lx nextpayload %x version %x " + "exchange %x flags %x msgid %x length %u", + clib_net_to_host_u64(ike->ispi), + clib_net_to_host_u64(ike->rspi), + payload, ike->version, + ike->exchange, ike->flags, + clib_net_to_host_u32(ike->msgid), + len); + + sa->ispi = clib_net_to_host_u64(ike->ispi); + + /* store whole IKE payload - needed for PSK auth */ + vec_free(sa->last_sa_init_req_packet_data); + vec_add(sa->last_sa_init_req_packet_data, ike, len); + + while (p < len && payload!= IKEV2_PAYLOAD_NONE) { + ike_payload_header_t * ikep = (ike_payload_header_t *) &ike->payload[p]; + u32 plen = clib_net_to_host_u16(ikep->length); + + if (plen < sizeof(ike_payload_header_t)) + return; + + if (payload == IKEV2_PAYLOAD_SA) + { + ikev2_sa_free_proposal_vector(&sa->i_proposals); + sa->i_proposals = ikev2_parse_sa_payload(ikep); + } + else if (payload == IKEV2_PAYLOAD_KE) + { + ike_ke_payload_header_t * ke = (ike_ke_payload_header_t *) ikep; + sa->dh_group = clib_net_to_host_u16(ke->dh_group); + vec_free(sa->i_dh_data); + vec_add(sa->i_dh_data, ke->payload, plen - sizeof(*ke)); + } + else if (payload == IKEV2_PAYLOAD_NONCE) + { + vec_free(sa->i_nonce); + vec_add(sa->i_nonce, ikep->payload, plen - sizeof(*ikep)); + } + else if (payload == IKEV2_PAYLOAD_NOTIFY) + { + ikev2_notify_t * n = ikev2_parse_notify_payload(ikep); + vec_free(n); + } + else if (payload == IKEV2_PAYLOAD_VENDOR) + { + ikev2_parse_vendor_payload(ikep); + } + else + { + clib_warning("unknown payload %u flags %x length %u", payload, ikep->flags, plen); + if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL) { + ikev2_set_state(sa, IKEV2_STATE_NOTIFY_AND_DELETE); + sa->unsupported_cp = payload; + return; + } + } + + payload = ikep->nextpayload; + p+=plen; + } + + ikev2_set_state(sa, IKEV2_STATE_SA_INIT); +} + +static u8 * +ikev2_decrypt_sk_payload(ikev2_sa_t * sa, ike_header_t * ike, u8 * payload) +{ + int p = 0; + u8 last_payload = 0; + u8 * hmac = 0; + u32 len = clib_net_to_host_u32(ike->length); + ike_payload_header_t * ikep; + u32 plen; + ikev2_sa_transform_t * tr_integ; + tr_integ = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG); + + while (p < len && + *payload != IKEV2_PAYLOAD_NONE && last_payload != IKEV2_PAYLOAD_SK) + { + ikep = (ike_payload_header_t *) &ike->payload[p]; + plen = clib_net_to_host_u16(ikep->length); + + if (plen < sizeof(*ikep)) + return 0; + + if (*payload == IKEV2_PAYLOAD_SK) + { + clib_warning("received IKEv2 payload SK, len %u", plen - 4); + last_payload = *payload; + } + else + { + clib_warning("unknown payload %u flags %x length %u", payload, ikep->flags, plen); + if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL) + { + sa->unsupported_cp = *payload; + return 0; + } + } + + *payload = ikep->nextpayload; + p+=plen; + } + + if (last_payload != IKEV2_PAYLOAD_SK) { + clib_warning("Last payload must be SK"); + return 0; + } + + hmac = ikev2_calc_integr(tr_integ, sa->sk_ai, (u8 *) ike, + len - tr_integ->key_trunc); + + plen = plen - sizeof(*ikep) - tr_integ->key_trunc; + + if (memcmp(hmac, &ikep->payload[plen], tr_integ->key_trunc)) + { + clib_warning("message integrity check failed"); + vec_free(hmac); + return 0; + } + vec_free(hmac); + + return ikev2_decrypt_data(sa, ikep->payload, plen); +} + +static void +ikev2_initial_contact_cleanup (ikev2_sa_t * sa) +{ + ikev2_main_t * km = &ikev2_main; + ikev2_sa_t * tmp; + u32 i, * delete = 0; + ikev2_child_sa_t * c; + + if (!sa->initial_contact) + return; + + /* find old IKE SAs with the same authenticated identity */ + pool_foreach (tmp, km->sas, ({ + if (tmp->i_id.type != sa->i_id.type || + vec_len(tmp->i_id.data) != vec_len(sa->i_id.data) || + memcmp(sa->i_id.data, tmp->i_id.data, vec_len(sa->i_id.data))) + continue; + + if (sa->rspi != tmp->rspi) + vec_add1(delete, tmp - km->sas); + })); + + for (i = 0; i < vec_len(delete); i++) + { + tmp = pool_elt_at_index(km->sas, delete[i]); + vec_foreach(c, tmp->childs) + ikev2_delete_tunnel_interface(km->vnet_main, tmp, c); + ikev2_delete_sa(tmp); + } + + vec_free(delete); + sa->initial_contact = 0; +} + +static void +ikev2_process_auth_req(vlib_main_t * vm, ikev2_sa_t *sa, ike_header_t * ike) +{ + ikev2_child_sa_t * first_child_sa; + int p = 0; + u32 len = clib_net_to_host_u32(ike->length); + u8 payload = ike->nextpayload; + u8 * plaintext = 0; + + ike_payload_header_t * ikep; + u32 plen; + + clib_warning("ispi %lx rspi %lx nextpayload %x version %x " + "exchange %x flags %x msgid %x length %u", + clib_net_to_host_u64(ike->ispi), + clib_net_to_host_u64(ike->rspi), + payload, ike->version, + ike->exchange, ike->flags, + clib_net_to_host_u32(ike->msgid), + len); + + ikev2_calc_keys(sa); + + plaintext = ikev2_decrypt_sk_payload(sa, ike, &payload); + + if (!plaintext) + { + if (sa->unsupported_cp) + ikev2_set_state(sa, IKEV2_STATE_NOTIFY_AND_DELETE); + goto cleanup_and_exit; + } + + /* create 1st child SA */ + ikev2_sa_free_all_child_sa(&sa->childs); + vec_add2(sa->childs, first_child_sa, 1); + + + /* process encrypted payload */ + p = 0; + while (p < vec_len(plaintext) && payload != IKEV2_PAYLOAD_NONE) + { + ikep = (ike_payload_header_t *) &plaintext[p]; + plen = clib_net_to_host_u16(ikep->length); + + if (plen < sizeof(ike_payload_header_t)) + goto cleanup_and_exit; + + if (payload == IKEV2_PAYLOAD_SA) /* 33 */ + { + clib_warning("received payload SA, len %u", plen - sizeof(*ikep)); + ikev2_sa_free_proposal_vector(&first_child_sa->i_proposals); + first_child_sa->i_proposals = ikev2_parse_sa_payload(ikep); + } + else if (payload == IKEV2_PAYLOAD_IDI) /* 35 */ + { + ike_id_payload_header_t * id = (ike_id_payload_header_t *) ikep; + + sa->i_id.type = id->id_type; + vec_free(sa->i_id.data); + vec_add(sa->i_id.data, id->payload, plen - sizeof(*id)); + + clib_warning("received payload IDi, len %u id_type %u", + plen - sizeof(*id), id->id_type); + } + else if (payload == IKEV2_PAYLOAD_AUTH) /* 39 */ + { + ike_auth_payload_header_t * a = (ike_auth_payload_header_t *) ikep; + + sa->i_auth.method = a->auth_method; + vec_free(sa->i_auth.data); + vec_add(sa->i_auth.data, a->payload, plen - sizeof(*a)); + + clib_warning("received payload AUTH, len %u auth_type %u", + plen - sizeof(*a), a->auth_method); + } + else if (payload == IKEV2_PAYLOAD_NOTIFY) /* 41 */ + { + ikev2_notify_t * n = ikev2_parse_notify_payload(ikep); + if (n->msg_type == IKEV2_NOTIFY_MSG_INITIAL_CONTACT) + { + sa->initial_contact = 1; + } + vec_free(n); + } + else if (payload == IKEV2_PAYLOAD_VENDOR) /* 43 */ + { + ikev2_parse_vendor_payload(ikep); + } + else if (payload == IKEV2_PAYLOAD_TSI) /* 44 */ + { + clib_warning("received payload TSi, len %u", plen - sizeof(*ikep)); + + vec_free(first_child_sa->tsi); + first_child_sa->tsi = ikev2_parse_ts_payload(ikep); + } + else if (payload == IKEV2_PAYLOAD_TSR) /* 45 */ + { + clib_warning("received payload TSr, len %u", plen - sizeof(*ikep)); + + vec_free(first_child_sa->tsr); + first_child_sa->tsr = ikev2_parse_ts_payload(ikep); + } + else + { + clib_warning("unknown payload %u flags %x length %u data %u", + payload, ikep->flags, plen - 4, + format_hex_bytes, ikep->payload, plen - 4); + + if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL) { + ikev2_set_state(sa, IKEV2_STATE_NOTIFY_AND_DELETE); + sa->unsupported_cp = payload; + return; + } + } + + payload = ikep->nextpayload; + p += plen; + } + +cleanup_and_exit: + vec_free(plaintext); +} + +static void +ikev2_process_informational_req(vlib_main_t * vm, ikev2_sa_t *sa, ike_header_t * ike) +{ + int p = 0; + u32 len = clib_net_to_host_u32(ike->length); + u8 payload = ike->nextpayload; + u8 * plaintext = 0; + + ike_payload_header_t * ikep; + u32 plen; + + clib_warning("ispi %lx rspi %lx nextpayload %x version %x " + "exchange %x flags %x msgid %x length %u", + clib_net_to_host_u64(ike->ispi), + clib_net_to_host_u64(ike->rspi), + payload, ike->version, + ike->exchange, ike->flags, + clib_net_to_host_u32(ike->msgid), + len); + + plaintext = ikev2_decrypt_sk_payload(sa, ike, &payload); + + if (!plaintext) + goto cleanup_and_exit; + + /* process encrypted payload */ + p = 0; + while (p < vec_len(plaintext) && payload != IKEV2_PAYLOAD_NONE) + { + ikep = (ike_payload_header_t *) &plaintext[p]; + plen = clib_net_to_host_u16(ikep->length); + + if (plen < sizeof(ike_payload_header_t)) + goto cleanup_and_exit; + + if (payload == IKEV2_PAYLOAD_NOTIFY) /* 41 */ + { + ikev2_notify_t * n = ikev2_parse_notify_payload(ikep); + if (n->msg_type == IKEV2_NOTIFY_MSG_AUTHENTICATION_FAILED) + ikev2_set_state(sa, IKEV2_STATE_AUTH_FAILED); + vec_free(n); + } + else if (payload == IKEV2_PAYLOAD_DELETE) /* 42 */ + { + sa->del = ikev2_parse_delete_payload(ikep); + } + else if (payload == IKEV2_PAYLOAD_VENDOR) /* 43 */ + { + ikev2_parse_vendor_payload(ikep); + } + else + { + clib_warning("unknown payload %u flags %x length %u data %u", + payload, ikep->flags, plen - 4, + format_hex_bytes, ikep->payload, plen - 4); + + if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL) { + sa->unsupported_cp = payload; + return; + } + } + + payload = ikep->nextpayload; + p += plen; + } + +cleanup_and_exit: + vec_free(plaintext); +} + +static void +ikev2_process_create_child_sa_req(vlib_main_t * vm, ikev2_sa_t *sa, ike_header_t * ike) +{ + int p = 0; + u32 len = clib_net_to_host_u32(ike->length); + u8 payload = ike->nextpayload; + u8 * plaintext = 0; + u8 rekeying = 0; + u8 i_nonce[IKEV2_NONCE_SIZE]; + + ike_payload_header_t * ikep; + u32 plen; + ikev2_notify_t * n = 0; + ikev2_ts_t * tsi = 0; + ikev2_ts_t * tsr = 0; + ikev2_sa_proposal_t * proposal = 0; + ikev2_child_sa_t * child_sa; + + clib_warning("ispi %lx rspi %lx nextpayload %x version %x " + "exchange %x flags %x msgid %x length %u", + clib_net_to_host_u64(ike->ispi), + clib_net_to_host_u64(ike->rspi), + payload, ike->version, + ike->exchange, ike->flags, + clib_net_to_host_u32(ike->msgid), + len); + + plaintext = ikev2_decrypt_sk_payload(sa, ike, &payload); + + if (!plaintext) + goto cleanup_and_exit; + + /* process encrypted payload */ + p = 0; + while (p < vec_len(plaintext) && payload != IKEV2_PAYLOAD_NONE) + { + ikep = (ike_payload_header_t *) &plaintext[p]; + plen = clib_net_to_host_u16(ikep->length); + + if (plen < sizeof(ike_payload_header_t)) + goto cleanup_and_exit; + + else if (payload == IKEV2_PAYLOAD_SA) + { + proposal = ikev2_parse_sa_payload(ikep); + } + else if (payload == IKEV2_PAYLOAD_NOTIFY) + { + n = ikev2_parse_notify_payload(ikep); + if (n->msg_type == IKEV2_NOTIFY_MSG_REKEY_SA) + { + rekeying = 1; + } + } + else if (payload == IKEV2_PAYLOAD_DELETE) + { + sa->del = ikev2_parse_delete_payload(ikep); + } + else if (payload == IKEV2_PAYLOAD_VENDOR) + { + ikev2_parse_vendor_payload(ikep); + } + else if (payload == IKEV2_PAYLOAD_NONCE) + { + memcpy(i_nonce, ikep->payload, plen - sizeof(*ikep)); + } + else if (payload == IKEV2_PAYLOAD_TSI) + { + tsi = ikev2_parse_ts_payload(ikep); + } + else if (payload == IKEV2_PAYLOAD_TSR) + { + tsr = ikev2_parse_ts_payload(ikep); + } + else + { + clib_warning("unknown payload %u flags %x length %u data %u", + payload, ikep->flags, plen - 4, + format_hex_bytes, ikep->payload, plen - 4); + + if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL) { + sa->unsupported_cp = payload; + return; + } + } + + payload = ikep->nextpayload; + p += plen; + } + + if (rekeying) + { + ikev2_rekey_t * rekey; + child_sa = ikev2_sa_get_child(sa, n->spi, n->protocol_id); + if (!child_sa) + { + clib_warning("child SA spi %lx not found", n->spi); + goto cleanup_and_exit; + } + vec_add2(sa->rekey, rekey, 1); + rekey->protocol_id = n->protocol_id; + rekey->spi = n->spi; + rekey->i_proposal = proposal; + rekey->r_proposal = ikev2_select_proposal(proposal, IKEV2_PROTOCOL_ESP); + rekey->tsi = tsi; + rekey->tsr = tsr; + /* update Ni */ + vec_free(sa->i_nonce); + vec_add(sa->i_nonce, i_nonce, IKEV2_NONCE_SIZE); + /* generate new Nr */ + vec_free(sa->r_nonce); + sa->r_nonce = vec_new(u8, IKEV2_NONCE_SIZE); + RAND_bytes((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE); + } + +cleanup_and_exit: + vec_free(plaintext); + vec_free(n); +} + +static u8 * +ikev2_sa_generate_authmsg(ikev2_sa_t *sa, int is_responder) +{ + u8 * authmsg = 0; + u8 * data; + u8 * nonce; + ikev2_id_t * id; + u8 * key; + u8 * packet_data; + ikev2_sa_transform_t * tr_prf; + + tr_prf = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF); + + if (is_responder) + { + id = &sa->r_id; + key = sa->sk_pr; + nonce = sa->i_nonce; + packet_data = sa->last_sa_init_res_packet_data; + } + else + { + id = &sa->i_id; + key = sa->sk_pi; + nonce = sa->r_nonce; + packet_data = sa->last_sa_init_req_packet_data; + } + + data = vec_new(u8, 4); + data[0] = id->type; + vec_append(data, id->data); + + u8 * id_hash = ikev2_calc_prf(tr_prf, key, data); + vec_append(authmsg, packet_data); + vec_append(authmsg, nonce); + vec_append(authmsg, id_hash); + vec_free(id_hash); + vec_free(data); + + return authmsg; +} + +static int +ikev2_ts_cmp(ikev2_ts_t * ts1, ikev2_ts_t * ts2) +{ + if (ts1->ts_type == ts2->ts_type && ts1->protocol_id == ts2->protocol_id && + ts1->start_port == ts2->start_port && ts1->end_port == ts2->end_port && + ts1->start_addr.as_u32 == ts2->start_addr.as_u32 && + ts1->end_addr.as_u32 == ts2->end_addr.as_u32) + return 1; + + return 0; +} + +static void +ikev2_sa_match_ts(ikev2_sa_t *sa) +{ + ikev2_main_t * km = &ikev2_main; + ikev2_profile_t * p; + ikev2_ts_t * ts, * tsi = 0, * tsr = 0; + + pool_foreach (p, km->profiles, ({ + + /* check id */ + if (p->rem_id.type != sa->i_id.type || + vec_len(p->rem_id.data) != vec_len(sa->i_id.data) || + memcmp(p->rem_id.data, sa->i_id.data, vec_len(p->rem_id.data))) + continue; + + vec_foreach(ts, sa->childs[0].tsi) + { + if (ikev2_ts_cmp(&p->rem_ts, ts)) + { + tsi = vec_dup(ts); + break; + } + } + + vec_foreach(ts, sa->childs[0].tsr) + { + if (ikev2_ts_cmp(&p->loc_ts, ts)) + { + tsr = vec_dup(ts); + break; + } + } + + break; + })); + + if (tsi && tsr) + { + vec_free(sa->childs[0].tsi); + vec_free(sa->childs[0].tsr); + sa->childs[0].tsi = tsi; + sa->childs[0].tsr = tsr; + } + else + { + vec_free(tsi); + vec_free(tsr); + ikev2_set_state(sa, IKEV2_STATE_TS_UNACCEPTABLE); + } +} + +static void +ikev2_sa_auth(ikev2_sa_t *sa) +{ + ikev2_main_t * km = &ikev2_main; + ikev2_profile_t * p, * sel_p = 0; + u8 * authmsg, * key_pad, * psk = 0, * auth = 0; + ikev2_sa_transform_t * tr_prf; + + tr_prf = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF); + + /* only shared key and rsa signature */ + if (!(sa->i_auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC || + sa->i_auth.method == IKEV2_AUTH_METHOD_RSA_SIG)) + { + clib_warning("unsupported authentication method %u", sa->i_auth.method); + ikev2_set_state(sa, IKEV2_STATE_AUTH_FAILED); + return; + } + + key_pad = format(0, "%s", IKEV2_KEY_PAD); + authmsg = ikev2_sa_generate_authmsg(sa, 0); + + pool_foreach (p, km->profiles, ({ + + /* check id */ + if (p->rem_id.type != sa->i_id.type || + vec_len(p->rem_id.data) != vec_len(sa->i_id.data) || + memcmp(p->rem_id.data, sa->i_id.data, vec_len(p->rem_id.data))) + continue; + + if (sa->i_auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC) + { + if (!p->auth.data || + p->auth.method != IKEV2_AUTH_METHOD_SHARED_KEY_MIC) + continue; + + psk = ikev2_calc_prf(tr_prf, p->auth.data, key_pad); + auth = ikev2_calc_prf(tr_prf, psk, authmsg); + + if (!memcmp(auth, sa->i_auth.data, vec_len(sa->i_auth.data))) + { + ikev2_set_state(sa, IKEV2_STATE_AUTHENTICATED); + vec_free(auth); + sel_p = p; + break; + } + + } + else if (sa->i_auth.method == IKEV2_AUTH_METHOD_RSA_SIG) + { + if (p->auth.method != IKEV2_AUTH_METHOD_RSA_SIG) + continue; + + if (ikev2_verify_sign(p->auth.key, sa->i_auth.data, authmsg) == 1) + { + ikev2_set_state(sa, IKEV2_STATE_AUTHENTICATED); + sel_p = p; + break; + } + } + + vec_free(auth); + vec_free(psk); + })); + + vec_free(authmsg); + + if (sa->state == IKEV2_STATE_AUTHENTICATED) + { + vec_free(sa->r_id.data); + sa->r_id.data = vec_dup(sel_p->loc_id.data); + sa->r_id.type = sel_p->loc_id.type; + + /* generate our auth data */ + authmsg = ikev2_sa_generate_authmsg(sa, 1); + if (sel_p->auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC) + { + sa->r_auth.data = ikev2_calc_prf(tr_prf, psk, authmsg); + sa->r_auth.method = IKEV2_AUTH_METHOD_SHARED_KEY_MIC; + } + else if (sel_p->auth.method == IKEV2_AUTH_METHOD_RSA_SIG) + { + sa->r_auth.data = ikev2_calc_sign(km->pkey, authmsg); + sa->r_auth.method = IKEV2_AUTH_METHOD_RSA_SIG; + } + vec_free(authmsg); + + /* select transforms for 1st child sa */ + ikev2_sa_free_proposal_vector(&sa->childs[0].r_proposals); + sa->childs[0].r_proposals = ikev2_select_proposal(sa->childs[0].i_proposals, + IKEV2_PROTOCOL_ESP); + } + else + { + ikev2_set_state(sa, IKEV2_STATE_AUTH_FAILED); + } + vec_free(psk); + vec_free(key_pad); +} + +static int +ikev2_create_tunnel_interface(vnet_main_t * vnm, ikev2_sa_t *sa, ikev2_child_sa_t * child) +{ + ipsec_add_del_tunnel_args_t a; + ikev2_sa_transform_t * tr; + u32 hw_if_index; + u8 encr_type = 0; + + if (!child->r_proposals) + { + ikev2_set_state(sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN); + return 1; + } + + a.is_add = 1; + a.local_ip.as_u32 = sa->raddr.as_u32; + a.remote_ip.as_u32 = sa->iaddr.as_u32; + a.local_spi = child->i_proposals[0].spi; + a.remote_spi = child->r_proposals[0].spi; + a.anti_replay = 1; + + tr = ikev2_sa_get_td_for_type(child->r_proposals, IKEV2_TRANSFORM_TYPE_ESN); + if (tr) + a.esn = tr->esn_type; + else + a.esn = 0; + + tr = ikev2_sa_get_td_for_type(child->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR); + if (tr) + { + if (tr->encr_type == IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC && tr->key_len) + { + switch (tr->key_len) + { + case 16: + encr_type = IPSEC_CRYPTO_ALG_AES_CBC_128; + break; + case 24: + encr_type = IPSEC_CRYPTO_ALG_AES_CBC_192; + break; + case 32: + encr_type = IPSEC_CRYPTO_ALG_AES_CBC_256; + break; + default: + ikev2_set_state(sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN); + return 1; + break; + } + } + else + { + ikev2_set_state(sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN); + return 1; + } + } + else + { + ikev2_set_state(sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN); + return 1; + } + + tr = ikev2_sa_get_td_for_type(child->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG); + if (tr) + { + if (tr->integ_type != IKEV2_TRANSFORM_INTEG_TYPE_AUTH_HMAC_SHA1_96) + { + ikev2_set_state(sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN); + return 1; + } + } + else + { + ikev2_set_state(sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN); + return 1; + } + + hw_if_index = ipsec_add_del_tunnel_if(vnm, &a); + if (hw_if_index == VNET_API_ERROR_INVALID_VALUE) + { + clib_warning("create tunnel interface failed remote-ip %U remote-spi %u", + format_ip4_address, &sa->raddr, child->r_proposals[0].spi); + ikev2_set_state(sa, IKEV2_STATE_DELETED); + return hw_if_index; + } + + ikev2_calc_child_keys(sa, child); + + ipsec_set_interface_key(vnm, hw_if_index, + IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO, + encr_type, + child->sk_er); + + ipsec_set_interface_key(vnm, hw_if_index, + IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO, + encr_type, + child->sk_ei); + + ipsec_set_interface_key(vnm, hw_if_index, + IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG, + IPSEC_INTEG_ALG_SHA1_96, + child->sk_ar); + + ipsec_set_interface_key(vnm, hw_if_index, + IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG, + IPSEC_INTEG_ALG_SHA1_96, + child->sk_ai); + + return 0; +} + +static int +ikev2_delete_tunnel_interface(vnet_main_t * vnm, ikev2_sa_t *sa, ikev2_child_sa_t * child) +{ + ipsec_add_del_tunnel_args_t a; + + if (!vec_len(child->r_proposals)) + return 0; + + a.is_add = 0; + a.local_ip.as_u32 = sa->raddr.as_u32; + a.remote_ip.as_u32 = sa->iaddr.as_u32; + a.local_spi = child->i_proposals[0].spi; + a.remote_spi = child->r_proposals[0].spi; + + return ipsec_add_del_tunnel_if(vnm, &a); +} + +static u32 +ikev2_generate_resp(ikev2_sa_t *sa, ike_header_t * ike) +{ + v8 * integ = 0; + ike_payload_header_t * ph; + u16 plen; + u32 tlen = 0; + + ikev2_sa_transform_t * tr_encr, *tr_integ; + tr_encr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR); + tr_integ = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG); + + ikev2_payload_chain_t * chain = 0; + ikev2_payload_new_chain(chain); + + if (ike->exchange == IKEV2_EXCHANGE_SA_INIT) + { + if (sa->r_proposals == 0) + { + ikev2_payload_add_notify(chain, IKEV2_NOTIFY_MSG_NO_PROPOSAL_CHOSEN, 0); + ikev2_set_state(sa, IKEV2_STATE_NOTIFY_AND_DELETE); + } + else if (sa->dh_group == IKEV2_TRANSFORM_DH_TYPE_NONE) + { + u8 * data = vec_new(u8, 2); + ikev2_sa_transform_t * tr_dh; + tr_dh = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH); + ASSERT(tr_dh && tr_dh->dh_type); + + data[0] = (tr_dh->dh_type >> 8) & 0xff; + data[1] = (tr_dh->dh_type) & 0xff; + + ikev2_payload_add_notify(chain, IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD, data); + vec_free(data); + ikev2_set_state(sa, IKEV2_STATE_NOTIFY_AND_DELETE); + } + else if (sa->state == IKEV2_STATE_NOTIFY_AND_DELETE) + { + u8 * data = vec_new(u8, 1); + + data[0] = sa->unsupported_cp; + ikev2_payload_add_notify(chain, + IKEV2_NOTIFY_MSG_UNSUPPORTED_CRITICAL_PAYLOAD, + data); + vec_free(data); + } + else + { + ike->rspi = clib_host_to_net_u64(sa->rspi); + ikev2_payload_add_sa(chain, sa->r_proposals); + ikev2_payload_add_ke(chain, sa->dh_group, sa->r_dh_data); + ikev2_payload_add_nonce(chain, sa->r_nonce); + } + } + else if (ike->exchange == IKEV2_EXCHANGE_IKE_AUTH) + { + if (sa->state == IKEV2_STATE_AUTHENTICATED) + { + ikev2_payload_add_id(chain, &sa->r_id, IKEV2_PAYLOAD_IDR); + ikev2_payload_add_auth(chain, &sa->r_auth); + ikev2_payload_add_sa(chain, sa->childs[0].r_proposals); + ikev2_payload_add_ts(chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI); + ikev2_payload_add_ts(chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR); + } + else if (sa->state == IKEV2_STATE_AUTH_FAILED) + { + ikev2_payload_add_notify(chain, IKEV2_NOTIFY_MSG_AUTHENTICATION_FAILED, 0); + ikev2_set_state(sa, IKEV2_STATE_NOTIFY_AND_DELETE); + } + else if (sa->state == IKEV2_STATE_TS_UNACCEPTABLE) + { + ikev2_payload_add_notify(chain, IKEV2_NOTIFY_MSG_TS_UNACCEPTABLE, 0); + ikev2_payload_add_id(chain, &sa->r_id, IKEV2_PAYLOAD_IDR); + ikev2_payload_add_auth(chain, &sa->r_auth); + } + else if (sa->state == IKEV2_STATE_NO_PROPOSAL_CHOSEN) + { + ikev2_payload_add_notify(chain, IKEV2_NOTIFY_MSG_NO_PROPOSAL_CHOSEN, 0); + ikev2_payload_add_id(chain, &sa->r_id, IKEV2_PAYLOAD_IDR); + ikev2_payload_add_auth(chain, &sa->r_auth); + ikev2_payload_add_ts(chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI); + ikev2_payload_add_ts(chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR); + } + else if (sa->state == IKEV2_STATE_NOTIFY_AND_DELETE) + { + u8 * data = vec_new(u8, 1); + + data[0] = sa->unsupported_cp; + ikev2_payload_add_notify(chain, + IKEV2_NOTIFY_MSG_UNSUPPORTED_CRITICAL_PAYLOAD, + data); + vec_free(data); + } + else + { + ikev2_set_state(sa, IKEV2_STATE_DELETED); + goto done; + } + } + else if (ike->exchange == IKEV2_EXCHANGE_INFORMATIONAL) + { + /* if pending delete */ + if (sa->del) + { + /* The response to a request that deletes the IKE SA is an empty + INFORMATIONAL response. */ + if (sa->del[0].protocol_id == IKEV2_PROTOCOL_IKE) + { + ikev2_set_state(sa, IKEV2_STATE_NOTIFY_AND_DELETE); + } + /* The response to a request that deletes ESP or AH SAs will contain + delete payloads for the paired SAs going in the other direction. */ + else + { + ikev2_payload_add_delete(chain, sa->del); + } + vec_free(sa->del); + sa->del = 0; + } + /* received N(AUTHENTICATION_FAILED) */ + else if (sa->state == IKEV2_STATE_AUTH_FAILED) + { + ikev2_set_state(sa, IKEV2_STATE_DELETED); + goto done; + } + /* received unsupported critical payload */ + else if (sa->unsupported_cp) + { + u8 * data = vec_new(u8, 1); + + data[0] = sa->unsupported_cp; + ikev2_payload_add_notify(chain, + IKEV2_NOTIFY_MSG_UNSUPPORTED_CRITICAL_PAYLOAD, + data); + vec_free(data); + sa->unsupported_cp = 0; + } + /* else send empty response */ + } + else if (ike->exchange == IKEV2_EXCHANGE_CREATE_CHILD_SA) + { + if (sa->rekey) + { + ikev2_payload_add_sa(chain, sa->rekey[0].r_proposal); + ikev2_payload_add_nonce(chain, sa->r_nonce); + ikev2_payload_add_ts(chain, sa->rekey[0].tsi, IKEV2_PAYLOAD_TSI); + ikev2_payload_add_ts(chain, sa->rekey[0].tsr, IKEV2_PAYLOAD_TSR); + vec_del1(sa->rekey, 0); + } + else if (sa->unsupported_cp) + { + u8 * data = vec_new(u8, 1); + + data[0] = sa->unsupported_cp; + ikev2_payload_add_notify(chain, + IKEV2_NOTIFY_MSG_UNSUPPORTED_CRITICAL_PAYLOAD, + data); + vec_free(data); + sa->unsupported_cp = 0; + } + else + { + ikev2_payload_add_notify(chain, IKEV2_NOTIFY_MSG_NO_ADDITIONAL_SAS, 0); + } + } + + /* IKEv2 header */ + ike->version = IKE_VERSION_2; + ike->flags = IKEV2_HDR_FLAG_RESPONSE; + ike->nextpayload = IKEV2_PAYLOAD_SK; + tlen = sizeof(*ike); + + + if (ike->exchange == IKEV2_EXCHANGE_SA_INIT) + { + tlen += vec_len(chain->data); + ike->nextpayload = chain->first_payload_type; + ike->length = clib_host_to_net_u32(tlen); + memcpy(ike->payload, chain->data, vec_len(chain->data)); + + /* store whole IKE payload - needed for PSK auth */ + vec_free(sa->last_sa_init_res_packet_data); + vec_add(sa->last_sa_init_res_packet_data, ike, tlen); + } + else + { + + ikev2_payload_chain_add_padding(chain, tr_encr->block_size); + + /* SK payload */ + plen = sizeof(*ph); + ph = (ike_payload_header_t *) &ike->payload[0]; + ph->nextpayload = chain->first_payload_type; + ph->flags = 0; + int enc_len = ikev2_encrypt_data(sa, chain->data, ph->payload); + plen += enc_len; + + /* add space for hmac */ + plen += tr_integ->key_trunc; + tlen += plen; + + /* payload and total length */ + ph->length = clib_host_to_net_u16(plen); + ike->length = clib_host_to_net_u32(tlen); + + /* calc integrity data for whole packet except hash itself */ + integ = ikev2_calc_integr(tr_integ, sa->sk_ar, (u8 *) ike, + tlen - tr_integ->key_trunc); + + memcpy(ike->payload + tlen - tr_integ->key_trunc - sizeof(*ike), + integ, tr_integ->key_trunc); + + /* store whole IKE payload - needed for retransmit */ + vec_free(sa->last_res_packet_data); + vec_add(sa->last_res_packet_data, ike, tlen); + } + +done: + ikev2_payload_destroy_chain (chain); + vec_free(integ); + return tlen; +} + +static int +ikev2_retransmit_sa_init (ike_header_t * ike, + ip4_address_t iaddr, + ip4_address_t raddr) +{ + ikev2_main_t * km = &ikev2_main; + ikev2_sa_t * sa; + + pool_foreach (sa, km->sas, ({ + if (sa->ispi == clib_net_to_host_u64(ike->ispi) && + sa->iaddr.as_u32 == iaddr.as_u32 && + sa->raddr.as_u32 == raddr.as_u32) + { + int p = 0; + u32 len = clib_net_to_host_u32(ike->length); + u8 payload = ike->nextpayload; + + while (p < len && payload!= IKEV2_PAYLOAD_NONE) { + ike_payload_header_t * ikep = (ike_payload_header_t *) &ike->payload[p]; + u32 plen = clib_net_to_host_u16(ikep->length); + + if (plen < sizeof(ike_payload_header_t)) + return -1; + + if (payload == IKEV2_PAYLOAD_NONCE) + { + if (!memcmp(sa->i_nonce, ikep->payload, plen - sizeof(*ikep))) + { + /* req is retransmit */ + if (sa->state == IKEV2_STATE_SA_INIT) + { + ike_header_t * tmp; + tmp = (ike_header_t*)sa->last_sa_init_res_packet_data; + ike->ispi = tmp->ispi; + ike->rspi = tmp->rspi; + ike->nextpayload = tmp->nextpayload; + ike->version = tmp->version; + ike->exchange = tmp->exchange; + ike->flags = tmp->flags; + ike->msgid = tmp->msgid; + ike->length = tmp->length; + memcpy(ike->payload, tmp->payload, + clib_net_to_host_u32(tmp->length) - sizeof(*ike)); + clib_warning("IKE_SA_INIT retransmit from %U to %U", + format_ip4_address, &raddr, + format_ip4_address, &iaddr); + return 1; + } + /* else ignore req */ + else + { + clib_warning("IKE_SA_INIT ignore from %U to %U", + format_ip4_address, &raddr, + format_ip4_address, &iaddr); + return -1; + } + } + } + payload = ikep->nextpayload; + p+=plen; + } + } + })); + + /* req is not retransmit */ + return 0; +} + +static int +ikev2_retransmit_resp (ikev2_sa_t * sa, ike_header_t * ike) +{ + u32 msg_id = clib_net_to_host_u32(ike->msgid); + + /* new req */ + if (msg_id > sa->last_msg_id) + { + sa->last_msg_id = msg_id; + return 0; + } + /* retransmitted req */ + else if (msg_id == sa->last_msg_id) + { + ike_header_t * tmp; + tmp = (ike_header_t*)sa->last_res_packet_data; + ike->ispi = tmp->ispi; + ike->rspi = tmp->rspi; + ike->nextpayload = tmp->nextpayload; + ike->version = tmp->version; + ike->exchange = tmp->exchange; + ike->flags = tmp->flags; + ike->msgid = tmp->msgid; + ike->length = tmp->length; + memcpy(ike->payload, tmp->payload, + clib_net_to_host_u32(tmp->length) - sizeof(*ike)); + clib_warning("IKE msgid %u retransmit from %U to %U", + msg_id, + format_ip4_address, &sa->raddr, + format_ip4_address, &sa->iaddr); + return 1; + } + /* old req ignore */ + else + { + clib_warning("IKE msgid %u req ignore from %U to %U", + msg_id, + format_ip4_address, &sa->raddr, + format_ip4_address, &sa->iaddr); + return -1; + } +} + +static uword +ikev2_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + ikev2_next_t next_index; + ikev2_main_t * km = &ikev2_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = IKEV2_NEXT_ERROR_DROP; + u32 sw_if_index0; + ip4_header_t * ip40; + udp_header_t * udp0; + ike_header_t * ike0; + ikev2_sa_t * sa0 = 0; + int len = 0; + int r; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ike0 = vlib_buffer_get_current (b0); + vlib_buffer_advance(b0, - sizeof(*udp0)); + udp0 = vlib_buffer_get_current (b0); + vlib_buffer_advance(b0, - sizeof(*ip40)); + ip40 = vlib_buffer_get_current (b0); + + if (ike0->version != IKE_VERSION_2) + { + vlib_node_increment_counter(vm, ikev2_node.index, + IKEV2_ERROR_NOT_IKEV2, 1); + goto dispatch0; + } + + if (ike0->exchange == IKEV2_EXCHANGE_SA_INIT) + { + ikev2_sa_t sa; /* temporary store for SA */ + sa0 = &sa; + memset (sa0, 0, sizeof (*sa0)); + + if (ike0->rspi == 0) + { + sa0->raddr.as_u32 = ip40->dst_address.as_u32; + sa0->iaddr.as_u32 = ip40->src_address.as_u32; + + r = ikev2_retransmit_sa_init(ike0, sa0->iaddr, sa0->raddr); + if (r == 1) + { + vlib_node_increment_counter(vm, ikev2_node.index, + IKEV2_ERROR_IKE_SA_INIT_RETRANSMIT, + 1); + len = clib_net_to_host_u32(ike0->length); + goto dispatch0; + } + else if (r == -1) + { + vlib_node_increment_counter(vm, ikev2_node.index, + IKEV2_ERROR_IKE_SA_INIT_IGNORE, + 1); + goto dispatch0; + } + + ikev2_process_sa_init_req(vm, sa0, ike0); + + if (sa0->state == IKEV2_STATE_SA_INIT) + { + ikev2_sa_free_proposal_vector(&sa0->r_proposals); + sa0->r_proposals = ikev2_select_proposal(sa0->i_proposals, + IKEV2_PROTOCOL_IKE); + ikev2_generate_sa_init_data(sa0); + } + + if (sa0->state == IKEV2_STATE_SA_INIT || + sa0->state == IKEV2_STATE_NOTIFY_AND_DELETE) + { + len = ikev2_generate_resp(sa0, ike0); + } + + if (sa0->state == IKEV2_STATE_SA_INIT) + { + /* add SA to the pool */ + pool_get (km->sas, sa0); + memcpy(sa0, &sa, sizeof(*sa0)); + hash_set (km->sa_by_rspi, sa0->rspi, sa0 - km->sas); + } + else + { + ikev2_sa_free_all_vec(sa0); + } + } + } + else if (ike0->exchange == IKEV2_EXCHANGE_IKE_AUTH) + { + uword * p; + p = hash_get(km->sa_by_rspi, clib_net_to_host_u64(ike0->rspi)); + if (p) + { + sa0 = pool_elt_at_index (km->sas, p[0]); + + r = ikev2_retransmit_resp(sa0, ike0); + if (r == 1) + { + vlib_node_increment_counter(vm, ikev2_node.index, + IKEV2_ERROR_IKE_REQ_RETRANSMIT, + 1); + len = clib_net_to_host_u32(ike0->length); + goto dispatch0; + } + else if (r == -1) + { + vlib_node_increment_counter(vm, ikev2_node.index, + IKEV2_ERROR_IKE_REQ_IGNORE, + 1); + goto dispatch0; + } + + ikev2_process_auth_req(vm, sa0, ike0); + ikev2_sa_auth(sa0); + if (sa0->state == IKEV2_STATE_AUTHENTICATED) + { + ikev2_initial_contact_cleanup(sa0); + ikev2_sa_match_ts(sa0); + if (sa0->state != IKEV2_STATE_TS_UNACCEPTABLE) + ikev2_create_tunnel_interface(km->vnet_main, sa0, + &sa0->childs[0]); + } + len = ikev2_generate_resp(sa0, ike0); + } + } + else if (ike0->exchange == IKEV2_EXCHANGE_INFORMATIONAL) + { + uword * p; + p = hash_get(km->sa_by_rspi, clib_net_to_host_u64(ike0->rspi)); + if (p) + { + sa0 = pool_elt_at_index (km->sas, p[0]); + + r = ikev2_retransmit_resp(sa0, ike0); + if (r == 1) + { + vlib_node_increment_counter(vm, ikev2_node.index, + IKEV2_ERROR_IKE_REQ_RETRANSMIT, + 1); + len = clib_net_to_host_u32(ike0->length); + goto dispatch0; + } + else if (r == -1) + { + vlib_node_increment_counter(vm, ikev2_node.index, + IKEV2_ERROR_IKE_REQ_IGNORE, + 1); + goto dispatch0; + } + + ikev2_process_informational_req(vm, sa0, ike0); + if (sa0->del) + { + if (sa0->del[0].protocol_id != IKEV2_PROTOCOL_IKE) + { + ikev2_delete_t * d, * tmp, * resp = 0; + vec_foreach(d, sa0->del) + { + ikev2_child_sa_t * ch_sa; + ch_sa = ikev2_sa_get_child(sa0, d->spi, + d->protocol_id); + if (ch_sa) + { + ikev2_delete_tunnel_interface(km->vnet_main, + sa0, ch_sa); + vec_add2(resp, tmp, 1); + tmp->protocol_id = d->protocol_id; + tmp->spi = ch_sa->r_proposals[0].spi; + ikev2_sa_del_child_sa(sa0, ch_sa); + } + } + vec_free(sa0->del); + sa0->del = resp; + } + } + len = ikev2_generate_resp(sa0, ike0); + } + } + else if (ike0->exchange == IKEV2_EXCHANGE_CREATE_CHILD_SA) + { + uword * p; + p = hash_get(km->sa_by_rspi, clib_net_to_host_u64(ike0->rspi)); + if (p) + { + sa0 = pool_elt_at_index (km->sas, p[0]); + + r = ikev2_retransmit_resp(sa0, ike0); + if (r == 1) + { + vlib_node_increment_counter(vm, ikev2_node.index, + IKEV2_ERROR_IKE_REQ_RETRANSMIT, + 1); + len = clib_net_to_host_u32(ike0->length); + goto dispatch0; + } + else if (r == -1) + { + vlib_node_increment_counter(vm, ikev2_node.index, + IKEV2_ERROR_IKE_REQ_IGNORE, + 1); + goto dispatch0; + } + + ikev2_process_create_child_sa_req(vm, sa0, ike0); + if (sa0->rekey) + { + if (sa0->rekey[0].protocol_id != IKEV2_PROTOCOL_IKE) + { + ikev2_child_sa_t * child; + vec_add2(sa0->childs, child, 1); + child->r_proposals = sa0->rekey[0].r_proposal; + child->i_proposals = sa0->rekey[0].i_proposal; + child->tsi = sa0->rekey[0].tsi; + child->tsr = sa0->rekey[0].tsr; + ikev2_create_tunnel_interface(km->vnet_main, sa0, + child); + } + len = ikev2_generate_resp(sa0, ike0); + } + } + } + else + { + clib_warning("IKEv2 exchange %u packet received from %U to %U", + ike0->exchange, + format_ip4_address, ip40->src_address.as_u8, + format_ip4_address, ip40->dst_address.as_u8); + hexdump((u8 *) ip40, b0->current_length); + } + +dispatch0: + /* if we are sending packet back, rewrite headers */ + if (len) + { + next0 = IKEV2_NEXT_IP4_LOOKUP; + ip40->dst_address.as_u32 = sa0->iaddr.as_u32; + ip40->src_address.as_u32 = sa0->raddr.as_u32; + udp0->length = clib_host_to_net_u16(len + sizeof(udp_header_t)); + udp0->checksum = 0; + b0->current_length = len + sizeof(ip4_header_t) + sizeof(udp_header_t); + ip40->length = clib_host_to_net_u16(b0->current_length); + ip40->checksum = ip4_header_checksum (ip40); +#if 0 + clib_warning("sending response:"); + hexdump(vlib_buffer_get_current (b0), b0->current_length); +#endif + } + /* delete sa */ + if (sa0 && (sa0->state == IKEV2_STATE_DELETED || + sa0->state == IKEV2_STATE_NOTIFY_AND_DELETE)) + { + ikev2_child_sa_t * c; + + vec_foreach(c, sa0->childs) + ikev2_delete_tunnel_interface(km->vnet_main, sa0, c); + + ikev2_delete_sa(sa0); + } + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ikev2_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ikev2_node.index, + IKEV2_ERROR_PROCESSED, frame->n_vectors); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ikev2_node) = { + .function = ikev2_node_fn, + .name = "ikev2", + .vector_size = sizeof (u32), + .format_trace = format_ikev2_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ikev2_error_strings), + .error_strings = ikev2_error_strings, + + .n_next_nodes = IKEV2_N_NEXT, + + .next_nodes = { + [IKEV2_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IKEV2_NEXT_ERROR_DROP] = "error-drop", + }, +}; + + +static ikev2_profile_t * +ikev2_profile_index_by_name(u8 * name) +{ + ikev2_main_t * km = &ikev2_main; + uword * p; + + p = mhash_get (&km->profile_index_by_name, name); + if (!p) + return 0; + + return pool_elt_at_index(km->profiles, p[0]); +} + +clib_error_t * +ikev2_set_local_key(vlib_main_t * vm, u8 * file) +{ + ikev2_main_t * km = &ikev2_main; + + km->pkey = ikev2_load_key_file(file); + if (km->pkey == NULL) + return clib_error_return(0, "load key '%s' failed", file); + + return 0; +} + +clib_error_t * +ikev2_add_del_profile(vlib_main_t * vm, u8 * name, int is_add) +{ + ikev2_main_t * km = &ikev2_main; + ikev2_profile_t * p; + + if (is_add) + { + if (ikev2_profile_index_by_name(name)) + return clib_error_return(0, "policy %v already exists", name); + + pool_get (km->profiles, p); + memset(p, 0, sizeof(*p)); + p->name = vec_dup(name); + uword index = p - km->profiles; + mhash_set_mem (&km->profile_index_by_name, name, &index, 0); + } + else + { + p = ikev2_profile_index_by_name(name); + if (!p) + return clib_error_return(0, "policy %v does not exists", name); + + vec_free (p->name); + pool_put (km->profiles, p); + mhash_unset (&km->profile_index_by_name, name, 0); + } + return 0; +} + +clib_error_t * +ikev2_set_profile_auth(vlib_main_t * vm, u8 * name, u8 auth_method, + u8 * auth_data, u8 data_hex_format) +{ + ikev2_profile_t * p; + clib_error_t * r; + + p = ikev2_profile_index_by_name(name); + + if (!p) { + r = clib_error_return(0, "unknown profile %v", name); + return r; + } + vec_free(p->auth.data); + p->auth.method = auth_method; + p->auth.data = vec_dup(auth_data); + p->auth.hex = data_hex_format; + + if (auth_method == IKEV2_AUTH_METHOD_RSA_SIG) + { + if (p->auth.key) + EVP_PKEY_free(p->auth.key); + p->auth.key = ikev2_load_cert_file(auth_data); + if (p->auth.key == NULL) + return clib_error_return(0, "load cert '%s' failed", auth_data); + } + + return 0; +} + +clib_error_t * +ikev2_set_profile_id(vlib_main_t * vm, u8 * name, u8 id_type, u8 * data, + int is_local) +{ + ikev2_profile_t * p; + clib_error_t * r; + + if (id_type > IKEV2_ID_TYPE_ID_RFC822_ADDR && id_type < IKEV2_ID_TYPE_ID_KEY_ID) + { + r = clib_error_return(0, "unsupported identity type %U", + format_ikev2_id_type, id_type); + return r; + } + + p = ikev2_profile_index_by_name(name); + + if (!p) { + r = clib_error_return(0, "unknown profile %v", name); + return r; + } + + if (is_local) + { + vec_free(p->loc_id.data); + p->loc_id.type = id_type; + p->loc_id.data = vec_dup(data); + } + else + { + vec_free(p->rem_id.data); + p->rem_id.type = id_type; + p->rem_id.data = vec_dup(data); + } + + return 0; +} + +clib_error_t * +ikev2_set_profile_ts(vlib_main_t * vm, u8 * name, u8 protocol_id, + u16 start_port, u16 end_port, ip4_address_t start_addr, + ip4_address_t end_addr, int is_local) +{ + ikev2_profile_t * p; + clib_error_t * r; + + p = ikev2_profile_index_by_name(name); + + if (!p) { + r = clib_error_return(0, "unknown profile %v", name); + return r; + } + + if (is_local) + { + p->loc_ts.start_addr.as_u32= start_addr.as_u32; + p->loc_ts.end_addr.as_u32 = end_addr.as_u32; + p->loc_ts.start_port = start_port; + p->loc_ts.end_port = end_port; + p->loc_ts.protocol_id = protocol_id; + p->loc_ts.ts_type = 7; + } + else + { + p->rem_ts.start_addr.as_u32 = start_addr.as_u32; + p->rem_ts.end_addr.as_u32 = end_addr.as_u32; + p->rem_ts.start_port = start_port; + p->rem_ts.end_port = end_port; + p->rem_ts.protocol_id = protocol_id; + p->rem_ts.ts_type = 7; + } + + return 0; +} + + +clib_error_t * +ikev2_init (vlib_main_t * vm) +{ + ikev2_main_t * km = &ikev2_main; + clib_error_t * error; + + memset (km, 0, sizeof (ikev2_main_t)); + km->vnet_main = vnet_get_main(); + km->vlib_main = vm; + + ikev2_crypto_init(km); + + km->sa_by_rspi = hash_create (0, sizeof (uword)); + mhash_init_vec_string (&km->profile_index_by_name, sizeof (uword)); + + if ((error = vlib_call_init_function (vm, ikev2_cli_init))) + return error; + + udp_register_dst_port (vm, 500, ikev2_node.index, 1); + + return 0; +} + + diff --git a/vnet/vnet/ipsec/ikev2.h b/vnet/vnet/ipsec/ikev2.h new file mode 100644 index 00000000000..fd0d75a62f3 --- /dev/null +++ b/vnet/vnet/ipsec/ikev2.h @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ikev2_h__ +#define __included_ikev2_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> + +#include <vppinfra/error.h> + +#define IKEV2_NONCE_SIZE 32 + +#define IKEV2_KEY_PAD "Key Pad for IKEv2" + +typedef u8 v8; + +vlib_node_registration_t ikev2_node; + +typedef CLIB_PACKED (struct { + u64 ispi; + u64 rspi; + u8 nextpayload; + u8 version; + u8 exchange; + u8 flags; + u32 msgid; + u32 length; + u8 payload[0]; +}) ike_header_t; + +typedef CLIB_PACKED (struct { + u8 nextpayload; + u8 flags; + u16 length; + u16 dh_group; + u8 reserved[2]; + u8 payload[0]; +}) ike_ke_payload_header_t; + +typedef CLIB_PACKED (struct { + u8 nextpayload; + u8 flags; + u16 length; + u8 payload[0]; +}) ike_payload_header_t; + +typedef CLIB_PACKED (struct { + u8 nextpayload; + u8 flags; + u16 length; + u8 auth_method; + u8 reserved[3]; + u8 payload[0]; +}) ike_auth_payload_header_t; + +typedef CLIB_PACKED (struct { + u8 nextpayload; + u8 flags; + u16 length; + u8 id_type; + u8 reserved[3]; + u8 payload[0]; +}) ike_id_payload_header_t; + +#define IKE_VERSION_2 0x20 + +#define IKEV2_EXCHANGE_SA_INIT 34 +#define IKEV2_EXCHANGE_IKE_AUTH 35 +#define IKEV2_EXCHANGE_CREATE_CHILD_SA 36 +#define IKEV2_EXCHANGE_INFORMATIONAL 37 + +#define IKEV2_HDR_FLAG_INITIATOR (1<<3) +#define IKEV2_HDR_FLAG_VERSION (1<<4) +#define IKEV2_HDR_FLAG_RESPONSE (1<<5) + +#define IKEV2_PAYLOAD_FLAG_CRITICAL (1<<7) + +#define IKEV2_PAYLOAD_NONE 0 +#define IKEV2_PAYLOAD_SA 33 +#define IKEV2_PAYLOAD_KE 34 +#define IKEV2_PAYLOAD_IDI 35 +#define IKEV2_PAYLOAD_IDR 36 +#define IKEV2_PAYLOAD_AUTH 39 +#define IKEV2_PAYLOAD_NONCE 40 +#define IKEV2_PAYLOAD_NOTIFY 41 +#define IKEV2_PAYLOAD_DELETE 42 +#define IKEV2_PAYLOAD_VENDOR 43 +#define IKEV2_PAYLOAD_TSI 44 +#define IKEV2_PAYLOAD_TSR 45 +#define IKEV2_PAYLOAD_SK 46 + +typedef enum { + IKEV2_PROTOCOL_IKE = 1, + IKEV2_PROTOCOL_AH = 2, + IKEV2_PROTOCOL_ESP = 3, +} ikev2_protocol_id_t; + +#define foreach_ikev2_notify_msg_type \ + _( 0, NONE) \ + _( 1, UNSUPPORTED_CRITICAL_PAYLOAD) \ + _( 4, INVALID_IKE_SPI) \ + _( 5, INVALID_MAJOR_VERSION) \ + _( 7, INVALID_SYNTAX) \ + _( 8, INVALID_MESSAGE_ID) \ + _( 11, INVALID_SPI) \ + _( 14, NO_PROPOSAL_CHOSEN) \ + _( 17, INVALID_KE_PAYLOAD) \ + _( 24, AUTHENTICATION_FAILED) \ + _( 34, SINGLE_PAIR_REQUIRED) \ + _( 35, NO_ADDITIONAL_SAS) \ + _( 36, INTERNAL_ADDRESS_FAILURE) \ + _( 37, FAILED_CP_REQUIRED) \ + _( 38, TS_UNACCEPTABLE) \ + _( 39, INVALID_SELECTORS) \ + _( 40, UNACCEPTABLE_ADDRESSES) \ + _( 41, UNEXPECTED_NAT_DETECTED) \ + _( 42, USE_ASSIGNED_HoA) \ + _( 43, TEMPORARY_FAILURE) \ + _( 44, CHILD_SA_NOT_FOUND) \ + _( 45, INVALID_GROUP_ID) \ + _( 46, AUTHORIZATION_FAILED) \ + _(16384, INITIAL_CONTACT) \ + _(16385, SET_WINDOW_SIZE) \ + _(16386, ADDITIONAL_TS_POSSIBLE) \ + _(16387, IPCOMP_SUPPORTED) \ + _(16388, NAT_DETECTION_SOURCE_IP) \ + _(16389, NAT_DETECTION_DESTINATION_IP) \ + _(16390, COOKIE) \ + _(16391, USE_TRANSPORT_MODE) \ + _(16392, HTTP_CERT_LOOKUP_SUPPORTED) \ + _(16393, REKEY_SA) \ + _(16394, ESP_TFC_PADDING_NOT_SUPPORTED) \ + _(16395, NON_FIRST_FRAGMENTS_ALSO) \ + _(16396, MOBIKE_SUPPORTED) \ + _(16397, ADDITIONAL_IP4_ADDRESS) \ + _(16398, ADDITIONAL_IP6_ADDRESS) \ + _(16399, NO_ADDITIONAL_ADDRESSES) \ + _(16400, UPDATE_SA_ADDRESSES) \ + _(16401, COOKIE2) \ + _(16402, NO_NATS_ALLOWED) \ + _(16403, AUTH_LIFETIME) \ + _(16404, MULTIPLE_AUTH_SUPPORTED) \ + _(16405, ANOTHER_AUTH_FOLLOWS) \ + _(16406, REDIRECT_SUPPORTED) \ + _(16407, REDIRECT) \ + _(16408, REDIRECTED_FROM) \ + _(16409, TICKET_LT_OPAQUE) \ + _(16410, TICKET_REQUEST) \ + _(16411, TICKET_ACK) \ + _(16412, TICKET_NACK) \ + _(16413, TICKET_OPAQUE) \ + _(16414, LINK_ID) \ + _(16415, USE_WESP_MODE) \ + _(16416, ROHC_SUPPORTED) \ + _(16417, EAP_ONLY_AUTHENTICATION) \ + _(16418, CHILDLESS_IKEV2_SUPPORTED) \ + _(16419, QUICK_CRASH_DETECTION) \ + _(16420, IKEV2_MESSAGE_ID_SYNC_SUPPORTED) \ + _(16421, IPSEC_REPLAY_COUNTER_SYNC_SUPPORTED) \ + _(16422, IKEV2_MESSAGE_ID_SYNC) \ + _(16423, IPSEC_REPLAY_COUNTER_SYNC) \ + _(16424, SECURE_PASSWORD_METHODS) \ + _(16425, PSK_PERSIST) \ + _(16426, PSK_CONFIRM) \ + _(16427, ERX_SUPPORTED) \ + _(16428, IFOM_CAPABILITY) \ + _(16429, SENDER_REQUEST_ID) \ + _(16430, IKEV2_FRAGMENTATION_SUPPORTED) \ + _(16431, SIGNATURE_HASH_ALGORITHMS) + + +typedef enum { +#define _(v,f) IKEV2_NOTIFY_MSG_##f = v, + foreach_ikev2_notify_msg_type +#undef _ +} ikev2_notify_msg_type_t; + +#define foreach_ikev2_transform_type \ + _(0, UNDEFINED, "undefinded") \ + _(1, ENCR, "encr") \ + _(2, PRF, "prf") \ + _(3, INTEG, "integ") \ + _(4, DH, "dh-group") \ + _(5, ESN, "esn") + +typedef enum { +#define _(v,f,s) IKEV2_TRANSFORM_TYPE_##f = v, + foreach_ikev2_transform_type +#undef _ + IKEV2_TRANSFORM_NUM_TYPES +} ikev2_transform_type_t; + + +#define foreach_ikev2_transform_encr_type \ + _(1 , DES_IV64, "des-iv64") \ + _(2 , DES, "des") \ + _(3 , 3DES, "3des") \ + _(4 , RC5, "rc5") \ + _(5 , IDEA, "idea") \ + _(6 , CAST, "cast") \ + _(7 , BLOWFISH, "blowfish") \ + _(8 , 3IDEA, "3idea") \ + _(9 , DES_IV32, "des-iv32") \ + _(11, NULL, "null") \ + _(12, AES_CBC, "aes-cbc") \ + _(13, AES_CTR, "aes-ctr") + +typedef enum { +#define _(v,f,str) IKEV2_TRANSFORM_ENCR_TYPE_##f = v, + foreach_ikev2_transform_encr_type +#undef _ +} ikev2_transform_encr_type_t; + +#define foreach_ikev2_transform_prf_type \ + _(1, PRF_HMAC_MD5, "hmac-md5") \ + _(2, PRF_HMAC_SHA1, "hmac-sha1") \ + _(3, PRF_MAC_TIGER, "mac-tiger") \ + _(4, PRF_AES128_XCBC, "aes128-xcbc") \ + _(5, PRF_HMAC_SHA2_256, "hmac-sha2-256") \ + _(6, PRF_HMAC_SHA2_384, "hmac-sha2-384") \ + _(7, PRF_HMAC_SHA2_512, "hmac-sha2-512") \ + _(8, PRF_AES128_CMAC, "aes128-cmac") + +typedef enum { +#define _(v,f,str) IKEV2_TRANSFORM_PRF_TYPE_##f = v, + foreach_ikev2_transform_prf_type +#undef _ +} ikev2_transform_prf_type_t; + +#define foreach_ikev2_transform_integ_type \ + _(0, NONE, "none") \ + _(1, AUTH_HMAC_MD5_96, "md5-96") \ + _(2, AUTH_HMAC_SHA1_96, "sha1-96") \ + _(3, AUTH_DES_MAC, "des-mac") \ + _(4, AUTH_KPDK_MD5, "kpdk-md5") \ + _(5, AUTH_AES_XCBC_96, "aes-xcbc-96") \ + _(6, AUTH_HMAC_MD5_128, "md5-128") \ + _(7, AUTH_HMAC_SHA1_160, "sha1-160") \ + _(8, AUTH_AES_CMAC_96, "cmac-96") \ + _(9, AUTH_AES_128_GMAC, "aes-128-gmac") \ + _(10, AUTH_AES_192_GMAC, "aes-192-gmac") \ + _(11, AUTH_AES_256_GMAC, "aes-256-gmac") \ + _(12, AUTH_HMAC_SHA2_256_128, "hmac-sha2-256-128") \ + _(13, AUTH_HMAC_SHA2_384_192, "hmac-sha2-384-192") \ + _(14, AUTH_HMAC_SHA2_512_256, "hmac-sha2-512-256") + +typedef enum { +#define _(v,f, str) IKEV2_TRANSFORM_INTEG_TYPE_##f = v, + foreach_ikev2_transform_integ_type +#undef _ +} ikev2_transform_integ_type_t; + +#if defined(OPENSSL_NO_CISCO_FECDH) +#define foreach_ikev2_transform_dh_type \ + _(0, NONE, "none") \ + _(1, MODP_768, "modp-768") \ + _(2, MODP_1024, "modp-1024") \ + _(5, MODP_1536, "modp-1536") \ + _(14, MODP_2048, "modp-2048") \ + _(15, MODP_3072, "modp-3072") \ + _(16, MODP_4096, "modp-4096") \ + _(17, MODP_6144, "modp-6144") \ + _(18, MODP_8192, "modp-8192") \ + _(19, ECP_256, "ecp-256") \ + _(20, ECP_384, "ecp-384") \ + _(21, ECP_521, "ecp-521") \ + _(22, MODP_1024_160, "modp-1024-160") \ + _(23, MODP_2048_224, "modp-2048-224") \ + _(24, MODP_2048_256, "modp-2048-256") \ + _(25, ECP_192, "ecp-192") \ + _(26, ECP_224, "ecp-224") \ + _(27, BRAINPOOL_224, "brainpool-224") \ + _(28, BRAINPOOL_256, "brainpool-256") \ + _(29, BRAINPOOL_384, "brainpool-384") \ + _(30, BRAINPOOL_512, "brainpool-512") +#else +#define foreach_ikev2_transform_dh_type \ + _(0, NONE, "none") \ + _(1, MODP_768, "modp-768") \ + _(2, MODP_1024, "modp-1024") \ + _(5, MODP_1536, "modp-1536") \ + _(14, MODP_2048, "modp-2048") \ + _(15, MODP_3072, "modp-3072") \ + _(16, MODP_4096, "modp-4096") \ + _(17, MODP_6144, "modp-6144") \ + _(18, MODP_8192, "modp-8192") \ + _(19, ECP_256, "ecp-256") \ + _(20, ECP_384, "ecp-384") \ + _(21, ECP_521, "ecp-521") \ + _(22, MODP_1024_160, "modp-1024-160") \ + _(23, MODP_2048_224, "modp-2048-224") \ + _(24, MODP_2048_256, "modp-2048-256") \ + _(25, ECP_192, "ecp-192") +#endif + +typedef enum { +#define _(v,f, str) IKEV2_TRANSFORM_DH_TYPE_##f = v, + foreach_ikev2_transform_dh_type +#undef _ +} ikev2_transform_dh_type_t; + +#define foreach_ikev2_transform_esn_type \ + _(0, NO_ESN, "no") \ + _(1, ESN, "yes") + +typedef enum { +#define _(v,f,str) IKEV2_TRANSFORM_ESN_TYPE_##f = v, + foreach_ikev2_transform_esn_type +#undef _ +} ikev2_transform_esn_type_t; + +#define foreach_ikev2_auth_method \ + _( 1, RSA_SIG, "rsa-sig") \ + _( 2, SHARED_KEY_MIC, "shared-key-mic") + +typedef enum { +#define _(v,f,s) IKEV2_AUTH_METHOD_##f = v, + foreach_ikev2_auth_method +#undef _ +} ikev2_auth_method_t; + +#define foreach_ikev2_id_type \ + _( 1, ID_IPV4_ADDR, "ip4-addr") \ + _( 2, ID_FQDN, "fqdn") \ + _( 3, ID_RFC822_ADDR, "rfc822") \ + _( 5, ID_IPV6_ADDR, "ip6-addr") \ + _( 9, ID_DER_ASN1_DN, "der-asn1-dn") \ + _(10, ID_DER_ASN1_GN, "der-asn1-gn") \ + _(11, ID_KEY_ID, "key-id") + +typedef enum { +#define _(v,f,s) IKEV2_ID_TYPE_##f = v, + foreach_ikev2_id_type +#undef _ +} ikev2_id_type_t; + +clib_error_t * ikev2_init (vlib_main_t * vm); +clib_error_t * ikev2_set_local_key(vlib_main_t * vm, u8 * file); +clib_error_t * ikev2_add_del_profile(vlib_main_t * vm, u8 * name, int is_add); +clib_error_t * ikev2_set_profile_auth(vlib_main_t * vm, u8 * name, + u8 auth_method, u8 * data, + u8 data_hex_format); +clib_error_t * ikev2_set_profile_id(vlib_main_t * vm, u8 * name, + u8 id_type, u8 * data, int is_local); +clib_error_t * ikev2_set_profile_ts(vlib_main_t * vm, u8 * name, u8 protocol_id, + u16 start_port, u16 end_port, + ip4_address_t start_addr, + ip4_address_t end_addr, int is_local); +/* ikev2_format.c */ +u8 * format_ikev2_auth_method (u8 * s, va_list * args); +u8 * format_ikev2_id_type (u8 * s, va_list * args); +u8 * format_ikev2_transform_type (u8 * s, va_list * args); +u8 * format_ikev2_notify_msg_type (u8 * s, va_list * args); +u8 * format_ikev2_transform_encr_type(u8 * s, va_list * args); +u8 * format_ikev2_transform_prf_type(u8 * s, va_list * args); +u8 * format_ikev2_transform_integ_type(u8 * s, va_list * args); +u8 * format_ikev2_transform_dh_type(u8 * s, va_list * args); +u8 * format_ikev2_transform_esn_type(u8 * s, va_list * args); +u8 * format_ikev2_sa_transform(u8 * s, va_list * args); + +uword unformat_ikev2_auth_method (unformat_input_t * input, va_list * args); +uword unformat_ikev2_id_type (unformat_input_t * input, va_list * args); +uword unformat_ikev2_transform_type (unformat_input_t * input, va_list * args); +uword unformat_ikev2_transform_encr_type (unformat_input_t * input, va_list * args); +uword unformat_ikev2_transform_prf_type (unformat_input_t * input, va_list * args); +uword unformat_ikev2_transform_integ_type (unformat_input_t * input, va_list * args); +uword unformat_ikev2_transform_dh_type (unformat_input_t * input, va_list * args); +uword unformat_ikev2_transform_esn_type (unformat_input_t * input, va_list * args); + +#endif /* __included_ikev2_h__ */ + diff --git a/vnet/vnet/ipsec/ikev2_cli.c b/vnet/vnet/ipsec/ikev2_cli.c new file mode 100644 index 00000000000..1e6009f0806 --- /dev/null +++ b/vnet/vnet/ipsec/ikev2_cli.c @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/ip/udp.h> +#include <vnet/ipsec/ikev2.h> +#include <vnet/ipsec/ikev2_priv.h> + +u8 * +format_ikev2_id_type_and_data (u8 * s, va_list * args) +{ + ikev2_id_t * id = va_arg (*args, ikev2_id_t *); + + if (id->type == 0 || vec_len(id->data) == 0) + return format(s, "none"); + + s = format(s, "%U", format_ikev2_id_type, id->type); + + if (id->type == IKEV2_ID_TYPE_ID_FQDN || + id->type == IKEV2_ID_TYPE_ID_RFC822_ADDR) + { + s = format(s, " %v", id->data); + } + else + { + s = format(s, " %U", format_hex_bytes, &id->data, (uword) (vec_len(id->data))); + } + + return s; +} + + +static clib_error_t * +show_ikev2_sa_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ikev2_main_t * km = &ikev2_main; + ikev2_sa_t * sa; + ikev2_ts_t * ts; + ikev2_child_sa_t * child; + ikev2_sa_transform_t * tr; + + pool_foreach (sa, km->sas, ({ + u8 * s = 0; + vlib_cli_output(vm, " iip %U ispi %lx rip %U rspi %lx", + format_ip4_address, &sa->iaddr, sa->ispi, + format_ip4_address, &sa->raddr, sa->rspi); + + tr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR); + s = format(s, "%U ", format_ikev2_sa_transform, tr); + + tr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF); + s = format(s, "%U ", format_ikev2_sa_transform, tr); + + tr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG); + s = format(s, "%U ", format_ikev2_sa_transform, tr); + + tr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH); + s = format(s, "%U ", format_ikev2_sa_transform, tr); + + vlib_cli_output(vm, " %v", s); + vec_free(s); + + vlib_cli_output(vm, " nonce i:%U\n r:%U", + format_hex_bytes, sa->i_nonce, vec_len(sa->i_nonce), + format_hex_bytes, sa->r_nonce, vec_len(sa->r_nonce)); + + vlib_cli_output(vm, " SK_d %U", + format_hex_bytes, sa->sk_d, vec_len(sa->sk_d)); + vlib_cli_output(vm, " SK_a i:%U\n r:%U", + format_hex_bytes, sa->sk_ai, vec_len(sa->sk_ai), + format_hex_bytes, sa->sk_ar, vec_len(sa->sk_ar)); + vlib_cli_output(vm, " SK_e i:%U\n r:%U", + format_hex_bytes, sa->sk_ei, vec_len(sa->sk_ei), + format_hex_bytes, sa->sk_er, vec_len(sa->sk_er)); + vlib_cli_output(vm, " SK_p i:%U\n r:%U", + format_hex_bytes, sa->sk_pi, vec_len(sa->sk_pi), + format_hex_bytes, sa->sk_pr, vec_len(sa->sk_pr)); + + vlib_cli_output(vm, " identifier (i) %U", + format_ikev2_id_type_and_data, &sa->i_id); + vlib_cli_output(vm, " identifier (r) %U", + format_ikev2_id_type_and_data, &sa->r_id); + + vec_foreach(child, sa->childs) + { + vlib_cli_output(vm, " child sa %u:", child - sa->childs); + + tr = ikev2_sa_get_td_for_type(child->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR); + s = format(s, "%U ", format_ikev2_sa_transform, tr); + + tr = ikev2_sa_get_td_for_type(child->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG); + s = format(s, "%U ", format_ikev2_sa_transform, tr); + + tr = ikev2_sa_get_td_for_type(child->r_proposals, IKEV2_TRANSFORM_TYPE_ESN); + s = format(s, "%U ", format_ikev2_sa_transform, tr); + + vlib_cli_output(vm, " %v", s); + vec_free(s); + + vlib_cli_output(vm, " spi(i) %lx spi(r) %lx", + child->i_proposals ? child->i_proposals[0].spi : 0, + child->r_proposals ? child->r_proposals[0].spi : 0); + + vlib_cli_output(vm, " SK_e i:%U\n r:%U", + format_hex_bytes, child->sk_ei, vec_len(child->sk_ei), + format_hex_bytes, child->sk_er, vec_len(child->sk_er)); + vlib_cli_output(vm, " SK_a i:%U\n r:%U", + format_hex_bytes, child->sk_ai, vec_len(child->sk_ai), + format_hex_bytes, child->sk_ar, vec_len(child->sk_ar)); + vlib_cli_output(vm, " traffic selectors (i):"); + vec_foreach(ts, child->tsi) + { + vlib_cli_output(vm, " %u type %u protocol_id %u addr " + "%U - %U port %u - %u", + ts - child->tsi, + ts->ts_type, ts->protocol_id, + format_ip4_address, &ts->start_addr, + format_ip4_address, &ts->end_addr, + clib_net_to_host_u16( ts->start_port), + clib_net_to_host_u16( ts->end_port)); + } + vlib_cli_output(vm, " traffic selectors (r):"); + vec_foreach(ts, child->tsr) + { + vlib_cli_output(vm, " %u type %u protocol_id %u addr " + "%U - %U port %u - %u", + ts - child->tsr, + ts->ts_type, ts->protocol_id, + format_ip4_address, &ts->start_addr, + format_ip4_address, &ts->end_addr, + clib_net_to_host_u16( ts->start_port), + clib_net_to_host_u16( ts->end_port)); + } + } + vlib_cli_output(vm, ""); + })); + return 0; +} + +VLIB_CLI_COMMAND (show_ikev2_sa_command, static) = { + .path = "show ikev2 sa", + .short_help = "show ikev2 sa", + .function = show_ikev2_sa_command_fn, +}; + +static clib_error_t * +ikev2_profile_add_del_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + u8 * name = 0; + clib_error_t * r = 0; + u32 id_type; + u8 * data = 0; + u32 tmp1, tmp2, tmp3; + ip4_address_t ip4; + ip4_address_t end_addr; + + const char * valid_chars = "a-zA-Z0-9_"; + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "add %U", unformat_token, valid_chars, &name)) + { + r = ikev2_add_del_profile(vm, name, 1); + goto done; + } + else if (unformat (line_input, "del %U", unformat_token, valid_chars, &name)) + { + r = ikev2_add_del_profile(vm, name, 0); + goto done; + } + else if (unformat (line_input, "set %U auth shared-key-mic string %v", + unformat_token, valid_chars, &name, &data)) + { + r = ikev2_set_profile_auth(vm, name, IKEV2_AUTH_METHOD_SHARED_KEY_MIC, + data, 0); + goto done; + } + else if (unformat (line_input, "set %U auth shared-key-mic hex %U", + unformat_token, valid_chars, &name, + unformat_hex_string, &data)) + { + r = ikev2_set_profile_auth(vm, name, IKEV2_AUTH_METHOD_SHARED_KEY_MIC, + data, 1); + goto done; + } + else if (unformat (line_input, "set %U auth rsa-sig cert-file %v", + unformat_token, valid_chars, &name, + &data)) + { + r = ikev2_set_profile_auth(vm, name, IKEV2_AUTH_METHOD_RSA_SIG, data, 0); + goto done; + } + else if (unformat (line_input, "set %U id local %U %U", + unformat_token, valid_chars, &name, + unformat_ikev2_id_type, &id_type, + unformat_ip4_address, &ip4)) + { + data = vec_new(u8, 4); + memcpy(data, ip4.as_u8, 4); + r = ikev2_set_profile_id(vm, name, (u8) id_type, data, /*local*/ 1); + goto done; + } + else if (unformat (line_input, "set %U id local %U 0x%U", + unformat_token, valid_chars, &name, + unformat_ikev2_id_type, &id_type, + unformat_hex_string, &data)) + { + r = ikev2_set_profile_id(vm, name, (u8) id_type, data, /*local*/ 1); + goto done; + } + else if (unformat (line_input, "set %U id local %U %v", + unformat_token, valid_chars, &name, + unformat_ikev2_id_type, &id_type, &data)) + { + r = ikev2_set_profile_id(vm, name, (u8) id_type, data, /*local*/ 1); + goto done; + } + else if (unformat (line_input, "set %U id remote %U %U", + unformat_token, valid_chars, &name, + unformat_ikev2_id_type, &id_type, + unformat_ip4_address, &ip4)) + { + data = vec_new(u8, 4); + memcpy(data, ip4.as_u8, 4); + r = ikev2_set_profile_id(vm, name, (u8) id_type, data, /*remote*/ 0); + goto done; + } + else if (unformat (line_input, "set %U id remote %U 0x%U", + unformat_token, valid_chars, &name, + unformat_ikev2_id_type, &id_type, + unformat_hex_string, &data)) + { + r = ikev2_set_profile_id(vm, name, (u8) id_type, data, /*remote*/ 0); + goto done; + } + else if (unformat (line_input, "set %U id remote %U %v", + unformat_token, valid_chars, &name, + unformat_ikev2_id_type, &id_type, &data)) + { + r = ikev2_set_profile_id(vm, name, (u8) id_type, data, /*remote*/ 0); + goto done; + } + else if (unformat (line_input, "set %U traffic-selector local " + "ip-range %U - %U port-range %u - %u protocol %u", + unformat_token, valid_chars, &name, + unformat_ip4_address, &ip4, + unformat_ip4_address, &end_addr, + &tmp1, &tmp2, &tmp3)) + { + r = ikev2_set_profile_ts(vm, name, (u8)tmp3, (u16)tmp1, (u16)tmp2, + ip4, end_addr, /*local*/ 1); + goto done; + } + else if (unformat (line_input, "set %U traffic-selector remote " + "ip-range %U - %U port-range %u - %u protocol %u", + unformat_token, valid_chars, &name, + unformat_ip4_address, &ip4, + unformat_ip4_address, &end_addr, + &tmp1, &tmp2, &tmp3)) + { + r = ikev2_set_profile_ts(vm, name, (u8)tmp3, (u16)tmp1, (u16)tmp2, + ip4, end_addr, /*remote*/ 0); + goto done; + } + else + break; + } + + r = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + +done: + vec_free(name); + vec_free(data); + unformat_free (line_input); + return r; +} + +VLIB_CLI_COMMAND (ikev2_profile_add_del_command, static) = { + .path = "ikev2 profile", + .short_help = + "ikev2 profile [add|del] <id>\n" + "ikev2 profile set <id> auth [rsa-sig|shared-key-mic] [cert-file|string|hex]" + " <data>\n" + "ikev2 profile set <id> id <local|remote> <type> <data>\n" + "ikev2 profile set <id> traffic-selector <local|remote> ip-range " + "<start-addr> - <end-addr> port-range <start-port> - <end-port> " + "protocol <protocol-number>", + .function = ikev2_profile_add_del_command_fn, +}; + +static clib_error_t * +show_ikev2_profile_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ikev2_main_t * km = &ikev2_main; + ikev2_profile_t * p; + + pool_foreach (p, km->profiles, ({ + vlib_cli_output(vm, "profile %v", p->name); + + if (p->auth.data) + { + if (p->auth.hex) + vlib_cli_output(vm, " auth-method %U auth data 0x%U", + format_ikev2_auth_method, p->auth.method, + format_hex_bytes, p->auth.data, vec_len(p->auth.data)); + else + vlib_cli_output(vm, " auth-method %U auth data %v", + format_ikev2_auth_method, p->auth.method, p->auth.data); + } + + if (p->loc_id.data) + { + if (p->loc_id.type == IKEV2_ID_TYPE_ID_IPV4_ADDR) + vlib_cli_output(vm, " local id-type %U data %U", + format_ikev2_id_type, p->loc_id.type, + format_ip4_address, p->loc_id.data); + else if (p->loc_id.type == IKEV2_ID_TYPE_ID_KEY_ID) + vlib_cli_output(vm, " local id-type %U data 0x%U", + format_ikev2_id_type, p->loc_id.type, + format_hex_bytes, p->loc_id.data, + vec_len(p->loc_id.data)); + else + vlib_cli_output(vm, " local id-type %U data %v", + format_ikev2_id_type, p->loc_id.type, p->loc_id.data); + } + + if (p->rem_id.data) + { + if (p->rem_id.type == IKEV2_ID_TYPE_ID_IPV4_ADDR) + vlib_cli_output(vm, " remote id-type %U data %U", + format_ikev2_id_type, p->rem_id.type, + format_ip4_address, p->rem_id.data); + else if (p->rem_id.type == IKEV2_ID_TYPE_ID_KEY_ID) + vlib_cli_output(vm, " remote id-type %U data 0x%U", + format_ikev2_id_type, p->rem_id.type, + format_hex_bytes, p->rem_id.data, + vec_len(p->rem_id.data)); + else + vlib_cli_output(vm, " remote id-type %U data %v", + format_ikev2_id_type, p->rem_id.type, p->rem_id.data); + } + + if (p->loc_ts.end_addr.as_u32) + vlib_cli_output(vm, " local traffic-selector addr %U - %U port %u - %u" + " protocol %u", + format_ip4_address, &p->loc_ts.start_addr, + format_ip4_address, &p->loc_ts.end_addr, + p->loc_ts.start_port, p->loc_ts.end_port, + p->loc_ts.protocol_id); + + if (p->rem_ts.end_addr.as_u32) + vlib_cli_output(vm, " remote traffic-selector addr %U - %U port %u - %u" + " protocol %u", + format_ip4_address, &p->rem_ts.start_addr, + format_ip4_address, &p->rem_ts.end_addr, + p->rem_ts.start_port, p->rem_ts.end_port, + p->rem_ts.protocol_id); + })); + + return 0; +} + +VLIB_CLI_COMMAND (show_ikev2_profile_command, static) = { + .path = "show ikev2 profile", + .short_help = "show ikev2 profile", + .function = show_ikev2_profile_command_fn, +}; + +static clib_error_t * +set_ikev2_local_key_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + clib_error_t * r = 0; + u8 * data = 0; + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "%v", &data)) + { + r = ikev2_set_local_key(vm, data); + goto done; + } + else + break; + } + + r = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + +done: + vec_free(data); + unformat_free (line_input); + return r; +} + +VLIB_CLI_COMMAND (set_ikev2_local_key_command, static) = { + .path = "set ikev2 local key", + .short_help = + "set ikev2 local key <file>", + .function = set_ikev2_local_key_command_fn, +}; + +clib_error_t * +ikev2_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (ikev2_cli_init); diff --git a/vnet/vnet/ipsec/ikev2_crypto.c b/vnet/vnet/ipsec/ikev2_crypto.c new file mode 100644 index 00000000000..b8dce034e3f --- /dev/null +++ b/vnet/vnet/ipsec/ikev2_crypto.c @@ -0,0 +1,753 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/ip/udp.h> +#include <vnet/ipsec/ikev2.h> +#include <vnet/ipsec/ikev2_priv.h> +#include <openssl/obj_mac.h> +#include <openssl/ec.h> +#include <openssl/x509.h> +#include <openssl/pem.h> +#include <openssl/bn.h> + +/* from RFC7296 */ +static const char modp_dh_768_prime[] = +"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" +"29024E088A67CC74020BBEA63B139B22514A08798E3404DD" +"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245" +"E485B576625E7EC6F44C42E9A63A3620FFFFFFFFFFFFFFFF"; +static const char modp_dh_768_generator[] = "02"; + +static const char modp_dh_1024_prime[] = +"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" +"29024E088A67CC74020BBEA63B139B22514A08798E3404DD" +"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245" +"E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED" +"EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE65381" +"FFFFFFFFFFFFFFFF"; +static const char modp_dh_1024_generator[] = "02"; + +/* from RFC3526 */ +static const char modp_dh_1536_prime[] = +"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" +"29024E088A67CC74020BBEA63B139B22514A08798E3404DD" +"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245" +"E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED" +"EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D" +"C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F" +"83655D23DCA3AD961C62F356208552BB9ED529077096966D" +"670C354E4ABC9804F1746C08CA237327FFFFFFFFFFFFFFFF"; +static const char modp_dh_1536_generator[] = "02"; + +static const char modp_dh_2048_prime[] = +"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" +"29024E088A67CC74020BBEA63B139B22514A08798E3404DD" +"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245" +"E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED" +"EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D" +"C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F" +"83655D23DCA3AD961C62F356208552BB9ED529077096966D" +"670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B" +"E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9" +"DE2BCBF6955817183995497CEA956AE515D2261898FA0510" +"15728E5A8AACAA68FFFFFFFFFFFFFFFF"; +static const char modp_dh_2048_generator[] = "02"; + +static const char modp_dh_3072_prime[] = +"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" +"29024E088A67CC74020BBEA63B139B22514A08798E3404DD" +"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245" +"E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED" +"EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D" +"C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F" +"83655D23DCA3AD961C62F356208552BB9ED529077096966D" +"670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B" +"E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9" +"DE2BCBF6955817183995497CEA956AE515D2261898FA0510" +"15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64" +"ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7" +"ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B" +"F12FFA06D98A0864D87602733EC86A64521F2B18177B200C" +"BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31" +"43DB5BFCE0FD108E4B82D120A93AD2CAFFFFFFFFFFFFFFFF"; +static const char modp_dh_3072_generator[] = "02"; + +static const char modp_dh_4096_prime[] = +"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" +"29024E088A67CC74020BBEA63B139B22514A08798E3404DD" +"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245" +"E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED" +"EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D" +"C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F" +"83655D23DCA3AD961C62F356208552BB9ED529077096966D" +"670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B" +"E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9" +"DE2BCBF6955817183995497CEA956AE515D2261898FA0510" +"15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64" +"ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7" +"ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B" +"F12FFA06D98A0864D87602733EC86A64521F2B18177B200C" +"BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31" +"43DB5BFCE0FD108E4B82D120A92108011A723C12A787E6D7" +"88719A10BDBA5B2699C327186AF4E23C1A946834B6150BDA" +"2583E9CA2AD44CE8DBBBC2DB04DE8EF92E8EFC141FBECAA6" +"287C59474E6BC05D99B2964FA090C3A2233BA186515BE7ED" +"1F612970CEE2D7AFB81BDD762170481CD0069127D5B05AA9" +"93B4EA988D8FDDC186FFB7DC90A6C08F4DF435C934063199" +"FFFFFFFFFFFFFFFF"; +static const char modp_dh_4096_generator[] = "02"; + +static const char modp_dh_6144_prime[] = +"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD129024E08" +"8A67CC74020BBEA63B139B22514A08798E3404DDEF9519B3CD3A431B" +"302B0A6DF25F14374FE1356D6D51C245E485B576625E7EC6F44C42E9" +"A637ED6B0BFF5CB6F406B7EDEE386BFB5A899FA5AE9F24117C4B1FE6" +"49286651ECE45B3DC2007CB8A163BF0598DA48361C55D39A69163FA8" +"FD24CF5F83655D23DCA3AD961C62F356208552BB9ED529077096966D" +"670C354E4ABC9804F1746C08CA18217C32905E462E36CE3BE39E772C" +"180E86039B2783A2EC07A28FB5C55DF06F4C52C9DE2BCBF695581718" +"3995497CEA956AE515D2261898FA051015728E5A8AAAC42DAD33170D" +"04507A33A85521ABDF1CBA64ECFB850458DBEF0A8AEA71575D060C7D" +"B3970F85A6E1E4C7ABF5AE8CDB0933D71E8C94E04A25619DCEE3D226" +"1AD2EE6BF12FFA06D98A0864D87602733EC86A64521F2B18177B200C" +"BBE117577A615D6C770988C0BAD946E208E24FA074E5AB3143DB5BFC" +"E0FD108E4B82D120A92108011A723C12A787E6D788719A10BDBA5B26" +"99C327186AF4E23C1A946834B6150BDA2583E9CA2AD44CE8DBBBC2DB" +"04DE8EF92E8EFC141FBECAA6287C59474E6BC05D99B2964FA090C3A2" +"233BA186515BE7ED1F612970CEE2D7AFB81BDD762170481CD0069127" +"D5B05AA993B4EA988D8FDDC186FFB7DC90A6C08F4DF435C934028492" +"36C3FAB4D27C7026C1D4DCB2602646DEC9751E763DBA37BDF8FF9406" +"AD9E530EE5DB382F413001AEB06A53ED9027D831179727B0865A8918" +"DA3EDBEBCF9B14ED44CE6CBACED4BB1BDB7F1447E6CC254B33205151" +"2BD7AF426FB8F401378CD2BF5983CA01C64B92ECF032EA15D1721D03" +"F482D7CE6E74FEF6D55E702F46980C82B5A84031900B1C9E59E7C97F" +"BEC7E8F323A97A7E36CC88BE0F1D45B7FF585AC54BD407B22B4154AA" +"CC8F6D7EBF48E1D814CC5ED20F8037E0A79715EEF29BE32806A1D58B" +"B7C5DA76F550AA3D8A1FBFF0EB19CCB1A313D55CDA56C9EC2EF29632" +"387FE8D76E3C0468043E8F663F4860EE12BF2D5B0B7474D6E694F91E" +"6DCC4024FFFFFFFFFFFFFFFF"; +static const char modp_dh_6144_generator[] = "02"; + +static const char modp_dh_8192_prime[] = +"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" +"29024E088A67CC74020BBEA63B139B22514A08798E3404DD" +"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245" +"E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED" +"EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D" +"C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F" +"83655D23DCA3AD961C62F356208552BB9ED529077096966D" +"670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B" +"E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9" +"DE2BCBF6955817183995497CEA956AE515D2261898FA0510" +"15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64" +"ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7" +"ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B" +"F12FFA06D98A0864D87602733EC86A64521F2B18177B200C" +"BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31" +"43DB5BFCE0FD108E4B82D120A92108011A723C12A787E6D7" +"88719A10BDBA5B2699C327186AF4E23C1A946834B6150BDA" +"2583E9CA2AD44CE8DBBBC2DB04DE8EF92E8EFC141FBECAA6" +"287C59474E6BC05D99B2964FA090C3A2233BA186515BE7ED" +"1F612970CEE2D7AFB81BDD762170481CD0069127D5B05AA9" +"93B4EA988D8FDDC186FFB7DC90A6C08F4DF435C934028492" +"36C3FAB4D27C7026C1D4DCB2602646DEC9751E763DBA37BD" +"F8FF9406AD9E530EE5DB382F413001AEB06A53ED9027D831" +"179727B0865A8918DA3EDBEBCF9B14ED44CE6CBACED4BB1B" +"DB7F1447E6CC254B332051512BD7AF426FB8F401378CD2BF" +"5983CA01C64B92ECF032EA15D1721D03F482D7CE6E74FEF6" +"D55E702F46980C82B5A84031900B1C9E59E7C97FBEC7E8F3" +"23A97A7E36CC88BE0F1D45B7FF585AC54BD407B22B4154AA" +"CC8F6D7EBF48E1D814CC5ED20F8037E0A79715EEF29BE328" +"06A1D58BB7C5DA76F550AA3D8A1FBFF0EB19CCB1A313D55C" +"DA56C9EC2EF29632387FE8D76E3C0468043E8F663F4860EE" +"12BF2D5B0B7474D6E694F91E6DBE115974A3926F12FEE5E4" +"38777CB6A932DF8CD8BEC4D073B931BA3BC832B68D9DD300" +"741FA7BF8AFC47ED2576F6936BA424663AAB639C5AE4F568" +"3423B4742BF1C978238F16CBE39D652DE3FDB8BEFC848AD9" +"22222E04A4037C0713EB57A81A23F0C73473FC646CEA306B" +"4BCBC8862F8385DDFA9D4B7FA2C087E879683303ED5BDD3A" +"062B3CF5B3A278A66D2A13F83F44F82DDF310EE074AB6A36" +"4597E899A0255DC164F31CC50846851DF9AB48195DED7EA1" +"B1D510BD7EE74D73FAF36BC31ECFA268359046F4EB879F92" +"4009438B481C6CD7889A002ED5EE382BC9190DA6FC026E47" +"9558E4475677E9AA9E3050E2765694DFC81F56E880B96E71" +"60C980DD98EDD3DFFFFFFFFFFFFFFFFF"; +static const char modp_dh_8192_generator[] = "02"; + +/* from RFC5114 */ +static const char modp_dh_1024_160_prime[] = +"B10B8F96A080E01DDE92DE5EAE5D54EC52C99FBCFB06A3C6" +"9A6A9DCA52D23B616073E28675A23D189838EF1E2EE652C0" +"13ECB4AEA906112324975C3CD49B83BFACCBDD7D90C4BD70" +"98488E9C219A73724EFFD6FAE5644738FAA31A4FF55BCCC0" +"A151AF5F0DC8B4BD45BF37DF365C1A65E68CFDA76D4DA708" +"DF1FB2BC2E4A4371"; +static const char modp_dh_1024_160_generator[] = +"A4D1CBD5C3FD34126765A442EFB99905F8104DD258AC507F" +"D6406CFF14266D31266FEA1E5C41564B777E690F5504F213" +"160217B4B01B886A5E91547F9E2749F4D7FBD7D3B9A92EE1" +"909D0D2263F80A76A6A24C087A091F531DBF0A0169B6A28A" +"D662A4D18E73AFA32D779D5918D08BC8858F4DCEF97C2A24" +"855E6EEB22B3B2E5"; + +static const char modp_dh_2048_224_prime[] = +"AD107E1E9123A9D0D660FAA79559C51FA20D64E5683B9FD1" +"B54B1597B61D0A75E6FA141DF95A56DBAF9A3C407BA1DF15" +"EB3D688A309C180E1DE6B85A1274A0A66D3F8152AD6AC212" +"9037C9EDEFDA4DF8D91E8FEF55B7394B7AD5B7D0B6C12207" +"C9F98D11ED34DBF6C6BA0B2C8BBC27BE6A00E0A0B9C49708" +"B3BF8A317091883681286130BC8985DB1602E714415D9330" +"278273C7DE31EFDC7310F7121FD5A07415987D9ADC0A486D" +"CDF93ACC44328387315D75E198C641A480CD86A1B9E587E8" +"BE60E69CC928B2B9C52172E413042E9B23F10B0E16E79763" +"C9B53DCF4BA80A29E3FB73C16B8E75B97EF363E2FFA31F71" +"CF9DE5384E71B81C0AC4DFFE0C10E64F"; +static const char modp_dh_2048_224_generator[] = +"AC4032EF4F2D9AE39DF30B5C8FFDAC506CDEBE7B89998CAF" +"74866A08CFE4FFE3A6824A4E10B9A6F0DD921F01A70C4AFA" +"AB739D7700C29F52C57DB17C620A8652BE5E9001A8D66AD7" +"C17669101999024AF4D027275AC1348BB8A762D0521BC98A" +"E247150422EA1ED409939D54DA7460CDB5F6C6B250717CBE" +"F180EB34118E98D119529A45D6F834566E3025E316A330EF" +"BB77A86F0C1AB15B051AE3D428C8F8ACB70A8137150B8EEB" +"10E183EDD19963DDD9E263E4770589EF6AA21E7F5F2FF381" +"B539CCE3409D13CD566AFBB48D6C019181E1BCFE94B30269" +"EDFE72FE9B6AA4BD7B5A0F1C71CFFF4C19C418E1F6EC0179" +"81BC087F2A7065B384B890D3191F2BFA"; + +static const char modp_dh_2048_256_prime[] = +"87A8E61DB4B6663CFFBBD19C651959998CEEF608660DD0F2" +"5D2CEED4435E3B00E00DF8F1D61957D4FAF7DF4561B2AA30" +"16C3D91134096FAA3BF4296D830E9A7C209E0C6497517ABD" +"5A8A9D306BCF67ED91F9E6725B4758C022E0B1EF4275BF7B" +"6C5BFC11D45F9088B941F54EB1E59BB8BC39A0BF12307F5C" +"4FDB70C581B23F76B63ACAE1CAA6B7902D52526735488A0E" +"F13C6D9A51BFA4AB3AD8347796524D8EF6A167B5A41825D9" +"67E144E5140564251CCACB83E6B486F6B3CA3F7971506026" +"C0B857F689962856DED4010ABD0BE621C3A3960A54E710C3" +"75F26375D7014103A4B54330C198AF126116D2276E11715F" +"693877FAD7EF09CADB094AE91E1A1597"; +static const char modp_dh_2048_256_generator[] = +"3FB32C9B73134D0B2E77506660EDBD484CA7B18F21EF2054" +"07F4793A1A0BA12510DBC15077BE463FFF4FED4AAC0BB555" +"BE3A6C1B0C6B47B1BC3773BF7E8C6F62901228F8C28CBB18" +"A55AE31341000A650196F931C77A57F2DDF463E5E9EC144B" +"777DE62AAAB8A8628AC376D282D6ED3864E67982428EBC83" +"1D14348F6F2F9193B5045AF2767164E1DFC967C1FB3F2E55" +"A4BD1BFFE83B9C80D052B985D182EA0ADB2A3B7313D3FE14" +"C8484B1E052588B9B7D2BBD2DF016199ECD06E1557CD0915" +"B3353BBB64E0EC377FD028370DF92B52C7891428CDC67EB6" +"184B523D1DB246C32F63078490F00EF8D647D148D4795451" +"5E2327CFEF98C582664B4C0F6CC41659"; + +v8 * +ikev2_calc_prf(ikev2_sa_transform_t * tr, v8 * key, v8 * data) +{ + HMAC_CTX ctx; + v8 * prf; + unsigned int len = 0; + + prf = vec_new(u8, tr->key_trunc); + HMAC_CTX_init(&ctx); + HMAC_Init_ex(&ctx, key, vec_len(key), tr->md, NULL); + HMAC_Update(&ctx, data, vec_len(data)); + HMAC_Final(&ctx, prf, &len); + HMAC_CTX_cleanup(&ctx); + + ASSERT(len == tr->key_trunc); + + return prf; +} +u8 * +ikev2_calc_prfplus(ikev2_sa_transform_t * tr, u8 * key, u8 * seed, int len) +{ + v8 * t = 0, * s = 0, * tmp = 0, * ret = 0; + u8 x = 0; + + /* prf+ (K,S) = T1 | T2 | T3 | T4 | ... + + where: + T1 = prf (K, S | 0x01) + T2 = prf (K, T1 | S | 0x02) + T3 = prf (K, T2 | S | 0x03) + T4 = prf (K, T3 | S | 0x04) + */ + + while (vec_len(ret) < len && x < 255) { + if (t) { + vec_append(s, t); + vec_free(t); + } + + vec_append(s, seed); + vec_add2(s, tmp, 1); + *tmp = x + 1; + t = ikev2_calc_prf(tr, key, s); + vec_append(ret, t); + vec_free(s); + x++; + } + + vec_free(t); + + if (x == 255) { + vec_free(ret); + } + + return ret; +} + +v8 * +ikev2_calc_integr(ikev2_sa_transform_t * tr, v8 * key, u8 * data, int len) +{ + v8 * r; + HMAC_CTX hctx; + unsigned int l; + + ASSERT(tr->type == IKEV2_TRANSFORM_TYPE_INTEG); + + r = vec_new(u8, tr->key_len); + + /* verify integrity of data */ + HMAC_CTX_init(&hctx); + HMAC_Init(&hctx, key, vec_len(key), tr->md); + HMAC_Update(&hctx, (const u8 *) data, len); + HMAC_Final(&hctx, r, &l); + HMAC_CTX_cleanup(&hctx); + + ASSERT(l == tr->key_len); + + return r; +} + +v8 * +ikev2_decrypt_data(ikev2_sa_t * sa, u8 * data, int len) +{ + EVP_CIPHER_CTX ctx; + v8 * r; + int out_len = 0, block_size; + ikev2_sa_transform_t * tr_encr; + + tr_encr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR); + block_size = tr_encr->block_size; + + /* check if data is multiplier of cipher block size */ + if (len % block_size) { + clib_warning("wrong data length"); + return 0; + } + + EVP_CIPHER_CTX_init(&ctx); + r = vec_new(u8, len - block_size); + EVP_DecryptInit_ex(&ctx, tr_encr->cipher, NULL, sa->sk_ei, data); + EVP_DecryptUpdate(&ctx, r, &out_len, data+block_size, len-block_size); + EVP_DecryptFinal_ex(&ctx, r + out_len, &out_len); + + /* remove padding */ + _vec_len(r) -= r[vec_len(r)-1] + 1; + + EVP_CIPHER_CTX_cleanup(&ctx); + return r; +} + +int +ikev2_encrypt_data(ikev2_sa_t * sa, v8 * src, u8 * dst) +{ + EVP_CIPHER_CTX ctx; + int out_len; + int bs; + ikev2_sa_transform_t * tr_encr; + + tr_encr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR); + bs = tr_encr->block_size; + + /* generate IV */ + RAND_bytes(dst, bs); + + EVP_CIPHER_CTX_init(&ctx); + + EVP_EncryptInit_ex(&ctx, tr_encr->cipher, NULL, sa->sk_er, dst /* dst */ ); + EVP_EncryptUpdate(&ctx, dst + bs, &out_len, src, vec_len(src)); + + EVP_CIPHER_CTX_cleanup(&ctx); + + ASSERT(vec_len(src) == out_len); + + return out_len + bs; +} + +void +ikev2_generate_dh(ikev2_sa_t * sa, ikev2_sa_transform_t * t) +{ + int r; + + if (t->dh_group == IKEV2_DH_GROUP_MODP) + { + DH * dh = DH_new(); + BN_hex2bn(&dh->p, t->dh_p); + BN_hex2bn(&dh->g, t->dh_g); + DH_generate_key(dh); + + sa->r_dh_data = vec_new(u8, t->key_len); + r = BN_bn2bin(dh->pub_key, sa->r_dh_data); + ASSERT(r == t->key_len); + + BIGNUM *ex; + sa->dh_shared_key = vec_new(u8, t->key_len); + ex = BN_bin2bn(sa->i_dh_data, vec_len(sa->i_dh_data) , NULL); + r = DH_compute_key(sa->dh_shared_key, ex, dh); + ASSERT(r == t->key_len); + BN_clear_free(ex); + DH_free(dh); + } + else if (t->dh_group == IKEV2_DH_GROUP_ECP) + { + EC_KEY * ec = EC_KEY_new_by_curve_name(t->nid); + ASSERT(ec); + + EC_KEY_generate_key(ec); + + const EC_POINT * r_point = EC_KEY_get0_public_key(ec); + const EC_GROUP * group = EC_KEY_get0_group(ec); + BIGNUM * x = NULL, * y = NULL; + BN_CTX * bn_ctx = BN_CTX_new(); + u16 x_off, y_off, len; + EC_POINT * i_point = EC_POINT_new(group); + EC_POINT * shared_point = EC_POINT_new(group); + + x = BN_new(); + y = BN_new(); + len = t->key_len / 2; + + EC_POINT_get_affine_coordinates_GFp(group, r_point, x, y, bn_ctx); + sa->r_dh_data = vec_new(u8, t->key_len); + x_off = len - BN_num_bytes(x); + memset(sa->r_dh_data, 0, x_off); + BN_bn2bin(x, sa->r_dh_data + x_off); + y_off = t->key_len - BN_num_bytes(y); + memset(sa->r_dh_data + len, 0, y_off - len); + BN_bn2bin(y, sa->r_dh_data + y_off); + + x = BN_bin2bn(sa->i_dh_data, len, x); + y = BN_bin2bn(sa->i_dh_data + len, len, y); + EC_POINT_set_affine_coordinates_GFp(group, i_point, x, y, bn_ctx); + sa->dh_shared_key = vec_new(u8, t->key_len); + EC_POINT_mul(group, shared_point, NULL, i_point, EC_KEY_get0_private_key(ec), NULL); + EC_POINT_get_affine_coordinates_GFp(group, shared_point, x, y, bn_ctx); + x_off = len - BN_num_bytes(x); + memset(sa->dh_shared_key, 0, x_off); + BN_bn2bin(x, sa->dh_shared_key + x_off); + y_off = t->key_len - BN_num_bytes(y); + memset(sa->dh_shared_key + len, 0, y_off - len); + BN_bn2bin(y, sa->dh_shared_key + y_off); + + EC_KEY_free(ec); + BN_free(x); + BN_free(y); + BN_CTX_free(bn_ctx); + EC_POINT_free(i_point); + EC_POINT_free(shared_point); + } +} + +int +ikev2_verify_sign (EVP_PKEY *pkey, u8 * sigbuf, u8 * data) +{ + EVP_MD_CTX md_ctx; + + EVP_VerifyInit(&md_ctx, EVP_sha1()); + EVP_VerifyUpdate(&md_ctx, data, vec_len(data)); + + return EVP_VerifyFinal(&md_ctx, sigbuf, vec_len(sigbuf), pkey); +} + +u8 * +ikev2_calc_sign (EVP_PKEY *pkey, u8 * data) +{ + EVP_MD_CTX md_ctx; + unsigned int sig_len = 0; + u8 * sign; + + EVP_SignInit(&md_ctx, EVP_sha1()); + EVP_SignUpdate(&md_ctx, data, vec_len(data)); + /* get sign len */ + EVP_SignFinal(&md_ctx, NULL, &sig_len, pkey); + sign = vec_new(u8, sig_len); + /* calc sign */ + EVP_SignFinal(&md_ctx, sign, &sig_len, pkey); + + return sign; +} + +EVP_PKEY * +ikev2_load_cert_file (u8 * file) +{ + FILE * fp; + X509 * x509; + EVP_PKEY * pkey = NULL; + + fp = fopen((char *)file, "r"); + if (!fp) + { + clib_warning("open %s failed", file); + goto end; + } + + x509 = PEM_read_X509(fp, NULL, NULL, NULL); + fclose(fp); + if (x509 == NULL) + { + clib_warning("read cert %s failed", file); + goto end; + } + + pkey = X509_get_pubkey(x509); + if (pkey == NULL) + clib_warning("get pubkey %s failed", file); + +end: + return pkey; +} + +EVP_PKEY * +ikev2_load_key_file (u8 * file) +{ + FILE *fp; + EVP_PKEY * pkey = NULL; + + fp = fopen((char *)file, "r"); + if (!fp) + { + clib_warning("open %s failed", file); + goto end; + } + + pkey = PEM_read_PrivateKey(fp, NULL, NULL, NULL); + fclose(fp); + if (pkey == NULL) + clib_warning("read %s failed", file); + +end: + return pkey; +} + +void +ikev2_crypto_init (ikev2_main_t * km) +{ + ikev2_sa_transform_t * tr; + + /* vector of supported transforms - in order of preference */ + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_ENCR; + tr->encr_type = IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC; + tr->key_len = 256/8; + tr->block_size = 128/8; + tr->cipher = EVP_aes_256_cbc(); + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_ENCR; + tr->encr_type = IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC; + tr->key_len = 192/8; + tr->block_size = 128/8; + tr->cipher = EVP_aes_192_cbc(); + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_ENCR; + tr->encr_type = IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC; + tr->key_len = 128/8; + tr->block_size = 128/8; + tr->cipher = EVP_aes_128_cbc(); + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_PRF; + tr->prf_type = IKEV2_TRANSFORM_PRF_TYPE_PRF_HMAC_SHA1; + tr->key_len = 160/8; + tr->key_trunc = 160/8; + tr->md = EVP_sha1(); + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_INTEG; + tr->integ_type = IKEV2_TRANSFORM_INTEG_TYPE_AUTH_HMAC_SHA1_96; + tr->key_len = 160/8; + tr->key_trunc = 96/8; + tr->md = EVP_sha1(); + +#if defined(OPENSSL_NO_CISCO_FECDH) + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_BRAINPOOL_512; + tr->key_len = (512 * 2)/8; + tr->nid = NID_brainpoolP512r1; + tr->dh_group = IKEV2_DH_GROUP_ECP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_BRAINPOOL_384; + tr->key_len = (384 * 2)/8; + tr->nid = NID_brainpoolP384r1; + tr->dh_group = IKEV2_DH_GROUP_ECP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_BRAINPOOL_256; + tr->key_len = (256 * 2)/8; + tr->nid = NID_brainpoolP256r1; + tr->dh_group = IKEV2_DH_GROUP_ECP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_BRAINPOOL_224; + tr->key_len = (224 * 2)/8; + tr->nid = NID_brainpoolP224r1; + tr->dh_group = IKEV2_DH_GROUP_ECP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_ECP_224; + tr->key_len = (224 * 2)/8; + tr->nid = NID_secp224r1; + tr->dh_group = IKEV2_DH_GROUP_ECP; +#endif + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_ECP_521; + tr->key_len = (528 * 2)/8; + tr->nid = NID_secp521r1; + tr->dh_group = IKEV2_DH_GROUP_ECP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_ECP_384; + tr->key_len = (384 * 2)/8; + tr->nid = NID_secp384r1; + tr->dh_group = IKEV2_DH_GROUP_ECP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_ECP_256; + tr->key_len = (256 * 2)/8; + tr->nid = NID_X9_62_prime256v1; + tr->dh_group = IKEV2_DH_GROUP_ECP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_ECP_192; + tr->key_len = (192 * 2)/8; + tr->nid = NID_X9_62_prime192v1; + tr->dh_group = IKEV2_DH_GROUP_ECP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_2048_256; + tr->key_len = 2048/8; + tr->dh_p = (const char *) &modp_dh_2048_256_prime; + tr->dh_g = (const char *) &modp_dh_2048_256_generator; + tr->dh_group = IKEV2_DH_GROUP_MODP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_2048_224; + tr->key_len = 2048/8; + tr->dh_p = (const char *) &modp_dh_2048_224_prime; + tr->dh_g = (const char *) &modp_dh_2048_224_generator; + tr->dh_group = IKEV2_DH_GROUP_MODP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_1024_160; + tr->key_len = 1024/8; + tr->dh_p = (const char *) &modp_dh_1024_160_prime; + tr->dh_g = (const char *) &modp_dh_1024_160_generator; + tr->dh_group = IKEV2_DH_GROUP_MODP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_8192; + tr->key_len = 8192/8; + tr->dh_p = (const char *) &modp_dh_8192_prime; + tr->dh_g = (const char *) &modp_dh_8192_generator; + tr->dh_group = IKEV2_DH_GROUP_MODP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_6144; + tr->key_len = 6144/8; + tr->dh_p = (const char *) &modp_dh_6144_prime; + tr->dh_g = (const char *) &modp_dh_6144_generator; + tr->dh_group = IKEV2_DH_GROUP_MODP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_4096; + tr->key_len = 4096/8; + tr->dh_p = (const char *) &modp_dh_4096_prime; + tr->dh_g = (const char *) &modp_dh_4096_generator; + tr->dh_group = IKEV2_DH_GROUP_MODP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_3072; + tr->key_len = 3072/8; + tr->dh_p = (const char *) &modp_dh_3072_prime; + tr->dh_g = (const char *) &modp_dh_3072_generator; + tr->dh_group = IKEV2_DH_GROUP_MODP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_2048; + tr->key_len = 2048/8; + tr->dh_p = (const char *) &modp_dh_2048_prime; + tr->dh_g = (const char *) &modp_dh_2048_generator; + tr->dh_group = IKEV2_DH_GROUP_MODP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_1536; + tr->key_len = 1536/8; + tr->dh_p = (const char *) &modp_dh_1536_prime; + tr->dh_g = (const char *) &modp_dh_1536_generator; + tr->dh_group = IKEV2_DH_GROUP_MODP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_1024; + tr->key_len = 1024/8; + tr->dh_p = (const char *) &modp_dh_1024_prime; + tr->dh_g = (const char *) &modp_dh_1024_generator; + tr->dh_group = IKEV2_DH_GROUP_MODP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_DH; + tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_768; + tr->key_len = 768/8; + tr->dh_p = (const char *) &modp_dh_768_prime; + tr->dh_g = (const char *) &modp_dh_768_generator; + tr->dh_group = IKEV2_DH_GROUP_MODP; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_ESN; + tr->esn_type = IKEV2_TRANSFORM_ESN_TYPE_ESN; + + vec_add2(km->supported_transforms, tr, 1); + tr->type = IKEV2_TRANSFORM_TYPE_ESN; + tr->esn_type = IKEV2_TRANSFORM_ESN_TYPE_NO_ESN; +} + + diff --git a/vnet/vnet/ipsec/ikev2_format.c b/vnet/vnet/ipsec/ikev2_format.c new file mode 100644 index 00000000000..b5f047f3f79 --- /dev/null +++ b/vnet/vnet/ipsec/ikev2_format.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> +#include <vnet/interface.h> + +#include <vnet/ipsec/ipsec.h> +#include <vnet/ipsec/ikev2.h> +#include <vnet/ipsec/ikev2_priv.h> + +u8 * format_ikev2_sa_transform(u8 * s, va_list * args) +{ + ikev2_sa_transform_t * tr = va_arg (*args, ikev2_sa_transform_t *); + + if (!tr) + return s; + + if (tr->type >= IKEV2_TRANSFORM_NUM_TYPES) + return s; + + s = format(s,"%U:", format_ikev2_transform_type, tr->type); + + switch (tr->type) + { + case IKEV2_TRANSFORM_TYPE_ENCR: + s = format(s, "%U", format_ikev2_transform_encr_type, tr->encr_type); + break; + case IKEV2_TRANSFORM_TYPE_PRF: + s = format(s, "%U", format_ikev2_transform_prf_type, tr->prf_type); + break; + case IKEV2_TRANSFORM_TYPE_INTEG: + s = format(s, "%U", format_ikev2_transform_integ_type, tr->integ_type); + break; + case IKEV2_TRANSFORM_TYPE_DH: + s = format(s, "%U", format_ikev2_transform_dh_type, tr->dh_type); + break; + case IKEV2_TRANSFORM_TYPE_ESN: + s = format(s, "%U", format_ikev2_transform_esn_type, tr->esn_type); + break; + default: + break; + } + + if (tr->type == IKEV2_TRANSFORM_TYPE_ENCR && + tr->encr_type == IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC && tr->key_len) + s = format(s, "-%u", tr->key_len * 8); + else if (vec_len(tr->attrs) == 4 && tr->attrs[0] == 0x80 && tr->attrs[1] == 0x0e) + s = format(s, "-%u", tr->attrs[2] * 256 + tr->attrs[3]); + else if (vec_len(tr->attrs)) + s = format(s, "(unknown attr %U)", format_hex_bytes, + tr->attrs, vec_len(tr->attrs)); + + return s; +} + +#define MACRO_FORMAT(lc) \ +u8 * format_ikev2_##lc (u8 * s, va_list * args) \ +{ \ + u32 i = va_arg (*args, u32); \ + char * t = 0; \ + switch (i) { \ + foreach_ikev2_##lc \ + default: \ + return format (s, "unknown (%u)", i); \ + } \ + s = format (s, "%s", t); \ + return s; \ +} + +#define MACRO_UNFORMAT(lc) \ +uword \ +unformat_ikev2_##lc (unformat_input_t * input, \ + va_list * args) \ +{ \ + u32 * r = va_arg (*args, u32 *); \ + if (0) ; \ + foreach_ikev2_##lc \ + else \ + return 0; \ + return 1; \ +} + +#define _(v,f,str) case IKEV2_AUTH_METHOD_##f: t = str; break; +MACRO_FORMAT(auth_method) +#undef _ +#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_AUTH_METHOD_##f; +MACRO_UNFORMAT(auth_method) +#undef _ + +#define _(v,f,str) case IKEV2_TRANSFORM_TYPE_##f: t = str; break; +MACRO_FORMAT(transform_type) +#undef _ +#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_TYPE_##f; +MACRO_UNFORMAT(transform_type) +#undef _ + +#define _(v,f) case IKEV2_NOTIFY_MSG_##f: t = #f; break; +MACRO_FORMAT(notify_msg_type) +#undef _ + +#define _(v,f,str) case IKEV2_ID_TYPE_##f: t = str; break; +MACRO_FORMAT(id_type) +#undef _ +#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_ID_TYPE_##f; +MACRO_UNFORMAT(id_type) +#undef _ + +#define _(v,f,str) case IKEV2_TRANSFORM_ENCR_TYPE_##f: t = str; break; +MACRO_FORMAT(transform_encr_type) +#undef _ +#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_ENCR_TYPE_##f; +MACRO_UNFORMAT(transform_encr_type) +#undef _ + +#define _(v,f,str) case IKEV2_TRANSFORM_PRF_TYPE_##f: t = str; break; +MACRO_FORMAT(transform_prf_type) +#undef _ +#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_PRF_TYPE_##f; +MACRO_UNFORMAT(transform_prf_type) +#undef _ + +#define _(v,f,str) case IKEV2_TRANSFORM_INTEG_TYPE_##f: t = str; break; +MACRO_FORMAT(transform_integ_type) +#undef _ +#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_INTEG_TYPE_##f; +MACRO_UNFORMAT(transform_integ_type) +#undef _ + +#define _(v,f,str) case IKEV2_TRANSFORM_DH_TYPE_##f: t = str; break; +MACRO_FORMAT(transform_dh_type) +#undef _ +#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_DH_TYPE_##f; +MACRO_UNFORMAT(transform_dh_type) +#undef _ + +#define _(v,f,str) case IKEV2_TRANSFORM_ESN_TYPE_##f: t = str; break; +MACRO_FORMAT(transform_esn_type) +#undef _ +#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_ESN_TYPE_##f; +MACRO_UNFORMAT(transform_esn_type) +#undef _ + diff --git a/vnet/vnet/ipsec/ikev2_payload.c b/vnet/vnet/ipsec/ikev2_payload.c new file mode 100644 index 00000000000..f523fa81cba --- /dev/null +++ b/vnet/vnet/ipsec/ikev2_payload.c @@ -0,0 +1,492 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> +#include <vnet/interface.h> + +#include <vnet/ipsec/ipsec.h> +#include <vnet/ipsec/ikev2.h> +#include <vnet/ipsec/ikev2_priv.h> + +typedef CLIB_PACKED (struct { + u8 nextpayload; + u8 flags; + u16 length; + u8 protocol_id; + u8 spi_size; + u16 msg_type; + u8 payload[0]; +}) ike_notify_payload_header_t; + +typedef CLIB_PACKED (struct { + u8 ts_type; + u8 protocol_id; + u16 selector_len; + u16 start_port; + u16 end_port; + ip4_address_t start_addr; + ip4_address_t end_addr; +}) ikev2_ts_payload_entry_t; + +typedef CLIB_PACKED (struct { + u8 nextpayload; + u8 flags; + u16 length; + u8 num_ts; + u8 reserved[3]; + ikev2_ts_payload_entry_t ts[0]; +}) ike_ts_payload_header_t; + +typedef CLIB_PACKED (struct { + u8 last_or_more; + u8 reserved; + u16 proposal_len; + u8 proposal_num; + u8 protocol_id; + u8 spi_size; + u8 num_transforms; + u32 spi[0]; +}) ike_sa_proposal_data_t; + +typedef CLIB_PACKED (struct { + u8 last_or_more; + u8 reserved; + u16 transform_len; + u8 transform_type; + u8 reserved2; + u16 transform_id; + u8 attributes[0]; +}) ike_sa_transform_data_t; + +typedef CLIB_PACKED (struct { + u8 nextpayload; + u8 flags; + u16 length; + u8 protocol_id; + u8 spi_size; + u16 num_of_spi; + u32 spi[0]; +}) ike_delete_payload_header_t; + +static ike_payload_header_t * +ikev2_payload_add_hdr(ikev2_payload_chain_t * c, u8 payload_type, int len) +{ + ike_payload_header_t * hdr = (ike_payload_header_t *) &c->data[c->last_hdr_off]; + u8 * tmp; + + if (c->data) + hdr->nextpayload = payload_type; + else + c->first_payload_type = payload_type; + + c->last_hdr_off = vec_len(c->data); + vec_add2(c->data, tmp, len); + hdr = (ike_payload_header_t *) tmp; + memset(hdr, 0, len); + + hdr->length = clib_host_to_net_u16(len); + + return hdr; +} + +static void +ikev2_payload_add_data(ikev2_payload_chain_t * c, u8 * data) +{ + u16 len; + ike_payload_header_t * hdr; + + vec_append(c->data, data); + hdr = (ike_payload_header_t *) &c->data[c->last_hdr_off]; + len = clib_net_to_host_u16(hdr->length); + hdr->length = clib_host_to_net_u16(len + vec_len(data)); +} + +void +ikev2_payload_add_notify(ikev2_payload_chain_t * c, u16 msg_type, u8 * data) +{ + ike_notify_payload_header_t * n; + + n = (ike_notify_payload_header_t *) ikev2_payload_add_hdr(c, IKEV2_PAYLOAD_NOTIFY, sizeof (*n)); + n->msg_type = clib_host_to_net_u16(msg_type); + ikev2_payload_add_data(c, data); +} + +void +ikev2_payload_add_sa(ikev2_payload_chain_t * c, ikev2_sa_proposal_t * proposals) +{ + ike_payload_header_t * ph; + ike_sa_proposal_data_t * prop; + ike_sa_transform_data_t * tr; + ikev2_sa_proposal_t * p; + ikev2_sa_transform_t * t; + + u8 * tmp; + u8 * pr_data = 0; + u8 * tr_data = 0; + + ikev2_payload_add_hdr(c, IKEV2_PAYLOAD_SA, sizeof (*ph)); + + vec_foreach(p, proposals) + { + int spi_size = (p->protocol_id == IKEV2_PROTOCOL_ESP) ? 4 : 0; + pr_data = vec_new(u8, sizeof(ike_sa_proposal_data_t) + spi_size); + prop = (ike_sa_proposal_data_t *) pr_data; + prop->last_or_more = proposals - p + 1 < vec_len(proposals) ? 2 : 0; + prop->protocol_id = p->protocol_id; + prop->proposal_num = p->proposal_num; + prop->spi_size = spi_size; + prop->num_transforms = vec_len(p->transforms); + + if (spi_size) + prop->spi[0] = clib_host_to_net_u32(p->spi); + + DBG_PLD("proposal num %u protocol_id %u last_or_more %u spi_size %u%s%U", + prop->proposal_num, prop->protocol_id, prop->last_or_more, + prop->spi_size, prop->spi_size ? " spi_data " : "", + format_hex_bytes, prop->spi, prop->spi_size); + + vec_foreach(t, p->transforms) + { + vec_add2(tr_data, tmp, sizeof(*tr) + vec_len(t->attrs)); + tr = (ike_sa_transform_data_t *) tmp; + tr->last_or_more = ((t - p->transforms) + 1 < vec_len(p->transforms)) ? 3 : 0; + tr->transform_type = t->type; + tr->transform_id = clib_host_to_net_u16(t->transform_id); + tr->transform_len = clib_host_to_net_u16(sizeof(*tr) + vec_len(t->attrs)); + + if (vec_len(t->attrs) > 0) + memcpy(tr->attributes, t->attrs, vec_len(t->attrs)); + + DBG_PLD("transform type %U transform_id %u last_or_more %u attr_size %u%s%U", + format_ikev2_transform_type, tr->transform_type, + t->transform_id, tr->last_or_more, vec_len(t->attrs), + vec_len(t->attrs) ? " attrs " : "", + format_hex_bytes, tr->attributes, vec_len(t->attrs)); + } + + prop->proposal_len = clib_host_to_net_u16(vec_len(tr_data) + vec_len(pr_data)); + ikev2_payload_add_data(c, pr_data); + ikev2_payload_add_data(c, tr_data); + vec_free(pr_data); + vec_free(tr_data); + } +} + +void +ikev2_payload_add_ke(ikev2_payload_chain_t * c, u16 dh_group, u8 * dh_data) +{ + ike_ke_payload_header_t * ke; + ke = (ike_ke_payload_header_t *) ikev2_payload_add_hdr(c, IKEV2_PAYLOAD_KE, + sizeof (*ke)); + + ke->dh_group = clib_host_to_net_u16(dh_group); + ikev2_payload_add_data(c, dh_data); +} + +void +ikev2_payload_add_nonce(ikev2_payload_chain_t * c, u8 * nonce) +{ + ikev2_payload_add_hdr(c, IKEV2_PAYLOAD_NONCE, sizeof (ike_payload_header_t)); + ikev2_payload_add_data(c, nonce); +} + +void +ikev2_payload_add_id(ikev2_payload_chain_t *c, ikev2_id_t * id, u8 type) +{ + ike_id_payload_header_t * idp; + idp = (ike_id_payload_header_t *) ikev2_payload_add_hdr(c, type, sizeof (*idp)); + + idp->id_type = id->type; + ikev2_payload_add_data(c, id->data); +} + +void +ikev2_payload_add_delete(ikev2_payload_chain_t *c, ikev2_delete_t * d) +{ + ike_delete_payload_header_t * dp; + u16 num_of_spi = vec_len(d); + ikev2_delete_t * d2; + dp = (ike_delete_payload_header_t *) ikev2_payload_add_hdr(c, IKEV2_PAYLOAD_DELETE, + sizeof (*dp)); + + if (d[0].protocol_id == IKEV2_PROTOCOL_IKE) + { + dp->protocol_id = 1; + } + else + { + dp->protocol_id = d[0].protocol_id; + dp->spi_size = 4; + dp->num_of_spi = clib_host_to_net_u16(num_of_spi); + vec_foreach(d2, d) + { + u8 * data = vec_new(u8, 4); + u32 spi = clib_host_to_net_u32(d2->spi); + memcpy(data, &spi, 4); + ikev2_payload_add_data(c, data); + vec_free(data); + } + } +} + +void +ikev2_payload_add_auth(ikev2_payload_chain_t *c, ikev2_auth_t * auth) +{ + ike_auth_payload_header_t * ap; + ap = (ike_auth_payload_header_t *) ikev2_payload_add_hdr(c, IKEV2_PAYLOAD_AUTH, + sizeof (*ap)); + + ap->auth_method = auth->method; + ikev2_payload_add_data(c, auth->data); +} + +void +ikev2_payload_add_ts(ikev2_payload_chain_t * c, ikev2_ts_t * ts, u8 type) +{ + ike_ts_payload_header_t * tsh; + ikev2_ts_t *ts2; + u8 * data = 0, * tmp; + + tsh = (ike_ts_payload_header_t *) ikev2_payload_add_hdr(c, type, sizeof (*tsh)); + tsh->num_ts = vec_len(ts); + + vec_foreach(ts2, ts) + { + ASSERT(ts2->ts_type == 7); /*TS_IPV4_ADDR_RANGE */ + ikev2_ts_payload_entry_t * entry; + vec_add2(data, tmp, sizeof(*entry)); + entry = (ikev2_ts_payload_entry_t *) tmp; + entry->ts_type = ts2->ts_type; + entry->protocol_id = ts2->protocol_id; + entry->selector_len = clib_host_to_net_u16(16); + entry->start_port = clib_host_to_net_u16(ts2->start_port); + entry->end_port = clib_host_to_net_u16(ts2->end_port); + entry->start_addr.as_u32 = ts2->start_addr.as_u32; + entry->end_addr.as_u32 = ts2->end_addr.as_u32; + } + + ikev2_payload_add_data(c, data); + vec_free(data); +} + +void +ikev2_payload_chain_add_padding(ikev2_payload_chain_t * c, int bs) +{ + u8 * tmp __attribute__((unused)); + u8 pad_len = (vec_len(c->data) / bs + 1) * bs - vec_len(c->data); + vec_add2(c->data, tmp, pad_len); + c->data[vec_len(c->data)-1] = pad_len - 1; +} + +ikev2_sa_proposal_t * +ikev2_parse_sa_payload(ike_payload_header_t * ikep) +{ + ikev2_sa_proposal_t * v = 0; + ikev2_sa_proposal_t * proposal; + ikev2_sa_transform_t * transform; + + u32 plen = clib_net_to_host_u16(ikep->length); + + ike_sa_proposal_data_t * sap; + int proposal_ptr = 0; + + do + { + sap = (ike_sa_proposal_data_t *) &ikep->payload[proposal_ptr]; + int i; + int transform_ptr; + + DBG_PLD("proposal num %u len %u last_or_more %u id %u " + "spi_size %u num_transforms %u", + sap->proposal_num, clib_net_to_host_u16(sap->proposal_len), + sap->last_or_more, sap->protocol_id, sap->spi_size, + sap->num_transforms); + + /* IKE proposal should not have SPI */ + if (sap->protocol_id == IKEV2_PROTOCOL_IKE && sap->spi_size != 0) + goto data_corrupted; + + /* IKE proposal should not have SPI */ + if (sap->protocol_id == IKEV2_PROTOCOL_ESP && sap->spi_size != 4) + goto data_corrupted; + + transform_ptr = proposal_ptr + sizeof(*sap) + sap->spi_size; + + vec_add2(v, proposal, 1); + proposal->proposal_num = sap->proposal_num; + proposal->protocol_id = sap->protocol_id; + + if (sap->spi_size == 4) { + proposal->spi = clib_net_to_host_u32(sap->spi[0]); + } + + for(i=0; i< sap->num_transforms; i++) + { + ike_sa_transform_data_t * tr = (ike_sa_transform_data_t *) &ikep->payload[transform_ptr]; + u16 tlen = clib_net_to_host_u16(tr->transform_len); + + if (tlen < sizeof(*tr)) + goto data_corrupted; + + vec_add2(proposal->transforms, transform, 1); + + transform->type = tr->transform_type; + transform->transform_id = clib_net_to_host_u16(tr->transform_id); + if (tlen > sizeof(*tr)) + vec_add(transform->attrs, tr->attributes, tlen - sizeof(*tr)); + + DBG_PLD("transform num %u len %u last_or_more %u type %U id %u%s%U", + i, tlen, tr->last_or_more, + format_ikev2_sa_transform, transform, + clib_net_to_host_u16(tr->transform_id), + tlen > sizeof(*tr) ? " attrs " : "", + format_hex_bytes, tr->attributes, tlen - sizeof (*tr)); + + transform_ptr += tlen; + } + + proposal_ptr += clib_net_to_host_u16(sap->proposal_len); + } + while (proposal_ptr < (plen - sizeof(*ikep)) && sap->last_or_more == 2); + + /* data validation */ + if (proposal_ptr != (plen - sizeof(*ikep)) || sap->last_or_more) + goto data_corrupted; + + return v; + +data_corrupted: + DBG_PLD("SA payload data corrupted"); + ikev2_sa_free_proposal_vector(&v); + return 0; +} + +ikev2_ts_t * +ikev2_parse_ts_payload(ike_payload_header_t * ikep) +{ + ike_ts_payload_header_t * tsp = (ike_ts_payload_header_t *) ikep; + ikev2_ts_t * r = 0, *ts; + u8 i; + + for (i = 0; i < tsp->num_ts; i++) + { + if (tsp->ts[i].ts_type != 7) /* TS_IPV4_ADDR_RANGE */ + { + DBG_PLD("unsupported TS type received (%u)", tsp->ts[i].ts_type); + continue; + } + + vec_add2(r, ts, 1); + ts->ts_type = tsp->ts[i].ts_type; + ts->protocol_id = tsp->ts[i].protocol_id; + ts->start_port = tsp->ts[i].start_port; + ts->end_port = tsp->ts[i].end_port; + ts->start_addr.as_u32 = tsp->ts[i].start_addr.as_u32; + ts->end_addr.as_u32 = tsp->ts[i].end_addr.as_u32; + } + return r; +} + +ikev2_notify_t * +ikev2_parse_notify_payload(ike_payload_header_t * ikep) +{ + ike_notify_payload_header_t * n = (ike_notify_payload_header_t *) ikep; + u32 plen = clib_net_to_host_u16(ikep->length); + ikev2_notify_t * r = 0; + u32 spi; + + DBG_PLD("msg_type %U len %u%s%U", + format_ikev2_notify_msg_type, clib_net_to_host_u16(n->msg_type), + plen, plen > sizeof(*n) ? " data ":"", + format_hex_bytes, n->payload, plen - sizeof(*n)); + + r = vec_new(ikev2_notify_t, 1); + r->msg_type = clib_net_to_host_u16(n->msg_type); + r->protocol_id = n->protocol_id; + + if (n->spi_size == 4) + { + memcpy(&spi, n->payload, n->spi_size); + r->spi = clib_net_to_host_u32(spi); + DBG_PLD("spi %lx", r->spi); + } + else if (n->spi_size == 0) + { + r->spi = 0; + } + else + { + clib_warning("invalid SPI Size %d", n->spi_size); + } + + if (plen > (sizeof(*n) + n->spi_size)) + { + vec_add(r->data, n->payload + n->spi_size, plen - sizeof(*n) - n->spi_size); + } + + return r; +} + +void +ikev2_parse_vendor_payload(ike_payload_header_t * ikep) +{ + u32 plen = clib_net_to_host_u16(ikep->length); + int i; + int is_string = 1; + + for(i=0; i < plen - 4; i++) + if (!isprint(ikep->payload[i])) + is_string = 0; + + DBG_PLD("len %u data %s:%U", + plen, + is_string ? "string":"hex", + is_string ? format_ascii_bytes : format_hex_bytes, + ikep->payload, plen - sizeof(*ikep)); +} + +ikev2_delete_t * +ikev2_parse_delete_payload(ike_payload_header_t * ikep) +{ + ike_delete_payload_header_t * d = (ike_delete_payload_header_t *) ikep; + u32 plen = clib_net_to_host_u16(ikep->length); + ikev2_delete_t * r = 0, * del; + u16 num_of_spi = clib_net_to_host_u16(d->num_of_spi); + u16 i = 0; + + DBG_PLD("protocol_id %u spi_size %u num_of_spi %u len %u%s%U", + d->protocol_id, d->spi_size, num_of_spi, + plen, plen > sizeof(d) ? " data ":"", + format_hex_bytes, d->spi, plen - sizeof(*d)); + + if (d->protocol_id == IKEV2_PROTOCOL_IKE) + { + r = vec_new(ikev2_delete_t, 1); + r->protocol_id = 1; + } + else + { + r = vec_new(ikev2_delete_t, num_of_spi); + vec_foreach(del, r) + { + del->protocol_id = d->protocol_id; + del->spi = clib_net_to_host_u32(d->spi[i++]); + } + } + + return r; +} diff --git a/vnet/vnet/ipsec/ikev2_priv.h b/vnet/vnet/ipsec/ikev2_priv.h new file mode 100644 index 00000000000..4f05a60a621 --- /dev/null +++ b/vnet/vnet/ipsec/ikev2_priv.h @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ikev2_priv_h__ +#define __included_ikev2_priv_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> + +#include <vnet/ipsec/ikev2.h> + +#include <vppinfra/hash.h> +#include <vppinfra/elog.h> +#include <vppinfra/error.h> + +#include <openssl/rand.h> +#include <openssl/dh.h> +#include <openssl/hmac.h> +#include <openssl/evp.h> + +#define IKEV2_DEBUG_PAYLOAD 1 + +#if IKEV2_DEBUG_PAYLOAD == 1 +#define DBG_PLD(my_args...) clib_warning(my_args) +#else +#define DBG_PLD(my_args...) +#endif + +typedef enum { + IKEV2_STATE_UNKNOWN, + IKEV2_STATE_SA_INIT, + IKEV2_STATE_DELETED, + IKEV2_STATE_AUTH_FAILED, + IKEV2_STATE_AUTHENTICATED, + IKEV2_STATE_NOTIFY_AND_DELETE, + IKEV2_STATE_TS_UNACCEPTABLE, + IKEV2_STATE_NO_PROPOSAL_CHOSEN, +} ikev2_state_t; + +typedef struct { + ikev2_auth_method_t method:8; + u8 * data; + u8 hex; /* hex encoding of the shared secret */ + EVP_PKEY * key; +} ikev2_auth_t; + +typedef enum { + IKEV2_DH_GROUP_MODP = 0, + IKEV2_DH_GROUP_ECP = 1, +} ikev2_dh_group_t; + +typedef struct { + ikev2_transform_type_t type; + union { + u16 transform_id; + ikev2_transform_encr_type_t encr_type:16; + ikev2_transform_prf_type_t prf_type:16; + ikev2_transform_integ_type_t integ_type:16; + ikev2_transform_dh_type_t dh_type:16; + ikev2_transform_esn_type_t esn_type:16; + }; + u8 * attrs; + u16 key_len; + u16 key_trunc; + u16 block_size; + u8 dh_group; + int nid; + const char * dh_p; + const char * dh_g; + const void * md; + const void * cipher; +} ikev2_sa_transform_t; + +typedef struct { + u8 proposal_num; + ikev2_protocol_id_t protocol_id:8; + u32 spi; + ikev2_sa_transform_t * transforms; +} ikev2_sa_proposal_t; + +typedef struct { + u8 ts_type; + u8 protocol_id; + u16 selector_len; + u16 start_port; + u16 end_port; + ip4_address_t start_addr; + ip4_address_t end_addr; +} ikev2_ts_t; + +typedef struct { + ikev2_id_type_t type:8; + u8 * data; +} ikev2_id_t; + +typedef struct { + /* sa proposals vectors */ + ikev2_sa_proposal_t * i_proposals; + ikev2_sa_proposal_t * r_proposals; + + /* Traffic Selectors */ + ikev2_ts_t * tsi; + ikev2_ts_t * tsr; + + /* keys */ + u8 * sk_ai; + u8 * sk_ar; + u8 * sk_ei; + u8 * sk_er; +} ikev2_child_sa_t; + +typedef struct { + u8 protocol_id; + u32 spi; /*for ESP and AH SPI size is 4, for IKE size is 0 */ +} ikev2_delete_t; + +typedef struct { + u8 protocol_id; + u32 spi; + ikev2_sa_proposal_t * i_proposal; + ikev2_sa_proposal_t * r_proposal; + ikev2_ts_t * tsi; + ikev2_ts_t * tsr; +} ikev2_rekey_t; + +typedef struct { + u16 msg_type; + u8 protocol_id; + u32 spi; + u8 * data; +} ikev2_notify_t; + + +typedef struct { + ikev2_state_t state; + u8 unsupported_cp; + u8 initial_contact; + ip4_address_t iaddr; + ip4_address_t raddr; + u64 ispi; + u64 rspi; + u8 * i_nonce; + u8 * r_nonce; + + /* DH data */ + u16 dh_group; + u8 * dh_shared_key; + u8 * i_dh_data; + u8 * r_dh_data; + + /* sa proposals vectors */ + ikev2_sa_proposal_t * i_proposals; + ikev2_sa_proposal_t * r_proposals; + + /* keys */ + u8 * sk_d; + u8 * sk_ai; + u8 * sk_ar; + u8 * sk_ei; + u8 * sk_er; + u8 * sk_pi; + u8 * sk_pr; + + /* auth */ + ikev2_auth_t i_auth; + ikev2_auth_t r_auth; + + /* ID */ + ikev2_id_t i_id; + ikev2_id_t r_id; + + /* pending deletes */ + ikev2_delete_t * del; + + /* pending rekeyings */ + ikev2_rekey_t * rekey; + + /* packet data */ + u8 * last_sa_init_req_packet_data; + u8 * last_sa_init_res_packet_data; + + /* retransmit */ + u32 last_msg_id; + u8 * last_res_packet_data; + + ikev2_child_sa_t * childs; +} ikev2_sa_t; + +typedef struct { + u8 * name; + u8 is_enabled; + + ikev2_auth_t auth; + ikev2_id_t loc_id; + ikev2_id_t rem_id; + ikev2_ts_t loc_ts; + ikev2_ts_t rem_ts; +} ikev2_profile_t; + +typedef struct { + /* pool of IKEv2 Security Associations */ + ikev2_sa_t * sas; + + /* pool of IKEv2 profiles */ + ikev2_profile_t * profiles; + + /* vector of supported transform types */ + ikev2_sa_transform_t * supported_transforms; + + /* hashes */ + uword * sa_by_rspi; + mhash_t profile_index_by_name; + + /* local private key */ + EVP_PKEY * pkey; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} ikev2_main_t; + +ikev2_main_t ikev2_main; + +void ikev2_sa_free_proposal_vector(ikev2_sa_proposal_t ** v); +ikev2_sa_transform_t * ikev2_sa_get_td_for_type(ikev2_sa_proposal_t * p, + ikev2_transform_type_t type); + +/* ikev2_crypto.c */ +v8 * ikev2_calc_prf(ikev2_sa_transform_t * tr, v8 * key, v8 * data); +u8 * ikev2_calc_prfplus(ikev2_sa_transform_t * tr, u8 * key, u8 * seed, int len); +v8 * ikev2_calc_integr(ikev2_sa_transform_t * tr, v8 * key, u8 * data, int len); +v8 * ikev2_decrypt_data(ikev2_sa_t * sa, u8 * data, int len); +int ikev2_encrypt_data(ikev2_sa_t * sa, v8 * src, u8 * dst); +void ikev2_generate_dh(ikev2_sa_t * sa, ikev2_sa_transform_t * t); +int ikev2_verify_sign(EVP_PKEY *pkey, u8 * sigbuf, u8 * data); +u8 * ikev2_calc_sign(EVP_PKEY *pkey, u8 * data); +EVP_PKEY * ikev2_load_cert_file(u8 * file); +EVP_PKEY * ikev2_load_key_file(u8 * file); +void ikev2_crypto_init (ikev2_main_t * km); + +/* ikev2_payload.c */ +typedef struct { + u8 first_payload_type; + u16 last_hdr_off; + u8 * data; +} ikev2_payload_chain_t; + +#define ikev2_payload_new_chain(V) vec_validate (V, 0) +#define ikev2_payload_destroy_chain(V) do { \ + vec_free((V)->data); \ + vec_free(V); \ +} while (0) + +void ikev2_payload_add_notify(ikev2_payload_chain_t * c, u16 msg_type, u8 * data); +void ikev2_payload_add_sa(ikev2_payload_chain_t * c, ikev2_sa_proposal_t * proposals); +void ikev2_payload_add_ke(ikev2_payload_chain_t * c, u16 dh_group, u8 * dh_data); +void ikev2_payload_add_nonce(ikev2_payload_chain_t * c, u8 * nonce); +void ikev2_payload_add_id(ikev2_payload_chain_t *c, ikev2_id_t * id, u8 type); +void ikev2_payload_add_auth(ikev2_payload_chain_t *c, ikev2_auth_t * auth); +void ikev2_payload_add_ts(ikev2_payload_chain_t * c, ikev2_ts_t * ts, u8 type); +void ikev2_payload_add_delete(ikev2_payload_chain_t *c, ikev2_delete_t * d); +void ikev2_payload_chain_add_padding(ikev2_payload_chain_t * c, int bs); +void ikev2_parse_vendor_payload(ike_payload_header_t * ikep); +ikev2_sa_proposal_t * ikev2_parse_sa_payload(ike_payload_header_t * ikep); +ikev2_ts_t * ikev2_parse_ts_payload(ike_payload_header_t * ikep); +ikev2_delete_t * ikev2_parse_delete_payload(ike_payload_header_t * ikep); +ikev2_notify_t * ikev2_parse_notify_payload(ike_payload_header_t * ikep); + +#endif /* __included_ikev2_priv_h__ */ + diff --git a/vnet/vnet/ipsec/ipsec.c b/vnet/vnet/ipsec/ipsec.c new file mode 100644 index 00000000000..c6a83557ce1 --- /dev/null +++ b/vnet/vnet/ipsec/ipsec.c @@ -0,0 +1,535 @@ +/* + * decap.c : IPSec tunnel support + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> +#include <vnet/interface.h> + +#include <vnet/ipsec/ipsec.h> +#include <vnet/ipsec/esp.h> +#include <vnet/ipsec/ikev2.h> + +int +ipsec_set_interface_spd(vlib_main_t * vm, u32 sw_if_index, u32 spd_id, int is_add) +{ + ipsec_main_t *im = &ipsec_main; + ip_lookup_main_t * lm; + ip_config_main_t * rx_cm; + ip4_ipsec_config_t config; + + u32 spd_index, ci; + uword *p; + + p = hash_get (im->spd_index_by_spd_id, spd_id); + if (!p) + return VNET_API_ERROR_SYSCALL_ERROR_1; /* no such spd-id */ + + spd_index = p[0]; + + p = hash_get (im->spd_index_by_sw_if_index, sw_if_index); + if (p && is_add) + return VNET_API_ERROR_SYSCALL_ERROR_1; /* spd already assigned */ + + if (is_add) + { + hash_set (im->spd_index_by_sw_if_index, sw_if_index, spd_index); + } + else + { + hash_unset (im->spd_index_by_sw_if_index, sw_if_index); + } + + clib_warning("sw_if_index %u spd_id %u spd_index %u", + sw_if_index, spd_id, spd_index); + + /* enable IPsec on TX */ + vnet_interface_add_del_feature(im->vnet_main, vm, sw_if_index, + INTF_OUTPUT_FEAT_IPSEC, is_add); + + /* enable IPsec on RX */ + config.spd_index = spd_index; + + /* IPv4 */ + lm = &ip4_main.lookup_main; + rx_cm = &lm->rx_config_mains[VNET_UNICAST]; + + ci = rx_cm->config_index_by_sw_if_index[sw_if_index]; + + ci = (is_add ? vnet_config_add_feature : vnet_config_del_feature) + (vm, &rx_cm->config_main, + ci, + IP4_RX_FEATURE_IPSEC, + &config, + sizeof (config)); + rx_cm->config_index_by_sw_if_index[sw_if_index] = ci; + + /* IPv6 */ + lm = &ip6_main.lookup_main; + rx_cm = &lm->rx_config_mains[VNET_UNICAST]; + + ci = rx_cm->config_index_by_sw_if_index[sw_if_index]; + + ci = (is_add ? vnet_config_add_feature : vnet_config_del_feature) + (vm, &rx_cm->config_main, + ci, + IP6_RX_FEATURE_IPSEC, + &config, + sizeof (config)); + rx_cm->config_index_by_sw_if_index[sw_if_index] = ci; + + return 0; +} + +int +ipsec_add_del_spd(vlib_main_t * vm, u32 spd_id, int is_add) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_spd_t * spd = 0; + uword *p; + u32 spd_index, k, v; + + p = hash_get (im->spd_index_by_spd_id, spd_id); + if (p && is_add) + return VNET_API_ERROR_INVALID_VALUE; + if (!p && !is_add) + return VNET_API_ERROR_INVALID_VALUE; + + if (!is_add) /* delete */ + { + spd_index = p[0]; + spd = pool_elt_at_index(im->spds, spd_index); + if (!spd) + return VNET_API_ERROR_INVALID_VALUE; + hash_foreach (k, v, im->spd_index_by_sw_if_index, ({ + if (v == spd_index) + ipsec_set_interface_spd(vm, k, spd_id, 0); + })); + hash_unset (im->spd_index_by_spd_id, spd_id); + pool_free (spd->policies); + vec_free (spd->ipv4_outbound_policies); + vec_free (spd->ipv6_outbound_policies); + vec_free (spd->ipv4_inbound_protect_policy_indices); + vec_free (spd->ipv4_inbound_policy_discard_and_bypass_indices); + pool_put (im->spds, spd); + } + else /* create new SPD */ + { + pool_get (im->spds, spd); + memset (spd, 0, sizeof (*spd)); + spd_index = spd - im->spds; + spd->id = spd_id; + hash_set (im->spd_index_by_spd_id, spd_id, spd_index); + } + return 0; +} + +static int +ipsec_spd_entry_sort(void * a1, void * a2) +{ + ipsec_main_t *im = &ipsec_main; + u32 * id1 = a1; + u32 * id2 = a2; + ipsec_spd_t * spd; + ipsec_policy_t * p1, * p2; + + pool_foreach (spd, im->spds, ({ + p1 = pool_elt_at_index(spd->policies, *id1); + p2 = pool_elt_at_index(spd->policies, *id2); + if (p1 && p2) + return p2->priority - p1->priority; + })); + + return 0; +} + +int +ipsec_add_del_policy(vlib_main_t * vm, ipsec_policy_t * policy, int is_add) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_spd_t * spd = 0; + ipsec_policy_t * vp; + uword *p; + u32 spd_index; + + clib_warning("policy-id %u priority %d is_outbound %u",policy->id, policy->priority, policy->is_outbound); + + if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) + { + p = hash_get(im->sa_index_by_sa_id, policy->sa_id); + if (!p) + return VNET_API_ERROR_SYSCALL_ERROR_1; + policy->sa_index = p[0]; + } + + p = hash_get (im->spd_index_by_spd_id, policy->id); + + if (!p) + return VNET_API_ERROR_SYSCALL_ERROR_1; + + spd_index = p[0]; + spd = pool_elt_at_index(im->spds, spd_index); + if (!spd) + return VNET_API_ERROR_SYSCALL_ERROR_1; + + if (is_add) + { + u32 policy_index; + + pool_get (spd->policies, vp); + memcpy (vp, policy, sizeof (*vp)); + policy_index = vp - spd->policies; + + if (policy->is_outbound) + { + if (policy->is_ipv6) + { + vec_add1 (spd->ipv6_outbound_policies, policy_index); + memcpy(vp, policy, sizeof(ipsec_policy_t)); + vec_sort_with_function (spd->ipv6_outbound_policies, + ipsec_spd_entry_sort); + } + else + { + vec_add1 (spd->ipv4_outbound_policies, policy_index); + memcpy(vp, policy, sizeof(ipsec_policy_t)); + vec_sort_with_function (spd->ipv4_outbound_policies, + ipsec_spd_entry_sort); + } + } + else + { + if (policy->is_ipv6) + { + if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) + { + vec_add1 (spd->ipv6_inbound_protect_policy_indices, + policy_index); + memcpy(vp, policy, sizeof(ipsec_policy_t)); + vec_sort_with_function ( + spd->ipv6_inbound_protect_policy_indices, + ipsec_spd_entry_sort); + } + else + { + vec_add1 (spd->ipv6_inbound_policy_discard_and_bypass_indices, + policy_index); + memcpy(vp, policy, sizeof(ipsec_policy_t)); + vec_sort_with_function ( + spd->ipv6_inbound_policy_discard_and_bypass_indices, + ipsec_spd_entry_sort); + } + } + else + { + if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) + { + vec_add1 (spd->ipv4_inbound_protect_policy_indices, + policy_index); + memcpy(vp, policy, sizeof(ipsec_policy_t)); + vec_sort_with_function ( + spd->ipv4_inbound_protect_policy_indices, + ipsec_spd_entry_sort); + } + else + { + vec_add1 (spd->ipv4_inbound_policy_discard_and_bypass_indices, + policy_index); + memcpy(vp, policy, sizeof(ipsec_policy_t)); + vec_sort_with_function ( + spd->ipv4_inbound_policy_discard_and_bypass_indices, + ipsec_spd_entry_sort); + } + } + } + + } + else + { + u32 i, j; + pool_foreach_index(i, spd->policies, ({ + vp = pool_elt_at_index(spd->policies, i); + if (vp->priority != policy->priority) + continue; + if (vp->is_outbound != policy->is_outbound) + continue; + if (vp->policy != policy->policy) + continue; + if (vp->sa_id != policy->sa_id) + continue; + if (vp->protocol != policy->protocol) + continue; + if (vp->lport.start != policy->lport.start) + continue; + if (vp->lport.stop != policy->lport.stop) + continue; + if (vp->rport.start != policy->rport.start) + continue; + if (vp->rport.stop != policy->rport.stop) + continue; + if (vp->is_ipv6 != policy->is_ipv6) + continue; + if (policy->is_ipv6) + { + if (vp->laddr.start.ip6.as_u64[0] != policy->laddr.start.ip6.as_u64[0]) + continue; + if (vp->laddr.start.ip6.as_u64[1] != policy->laddr.start.ip6.as_u64[1]) + continue; + if (vp->laddr.stop.ip6.as_u64[0] != policy->laddr.stop.ip6.as_u64[0]) + continue; + if (vp->laddr.stop.ip6.as_u64[1] != policy->laddr.stop.ip6.as_u64[1]) + continue; + if (vp->raddr.start.ip6.as_u64[0] != policy->raddr.start.ip6.as_u64[0]) + continue; + if (vp->raddr.start.ip6.as_u64[1] != policy->raddr.start.ip6.as_u64[1]) + continue; + if (vp->raddr.stop.ip6.as_u64[0] != policy->raddr.stop.ip6.as_u64[0]) + continue; + if (vp->laddr.stop.ip6.as_u64[1] != policy->laddr.stop.ip6.as_u64[1]) + continue; + if (policy->is_outbound) + { + vec_foreach_index(j, spd->ipv6_outbound_policies) { + if (vec_elt(spd->ipv6_outbound_policies, j) == i) { + vec_del1 (spd->ipv6_outbound_policies, j); + break; + } + } + } + else + { + if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) + { + vec_foreach_index(j, spd->ipv6_inbound_protect_policy_indices) { + if (vec_elt(spd->ipv6_inbound_protect_policy_indices, j) == i) { + vec_del1 (spd->ipv6_inbound_protect_policy_indices, j); + break; + } + } + } + else + { + vec_foreach_index(j, spd->ipv6_inbound_policy_discard_and_bypass_indices) { + if (vec_elt(spd->ipv6_inbound_policy_discard_and_bypass_indices, j) == i) { + vec_del1 (spd->ipv6_inbound_policy_discard_and_bypass_indices, j); + break; + } + } + } + } + } + else + { + if (vp->laddr.start.ip4.as_u32 != policy->laddr.start.ip4.as_u32) + continue; + if (vp->laddr.stop.ip4.as_u32 != policy->laddr.stop.ip4.as_u32) + continue; + if (vp->raddr.start.ip4.as_u32 != policy->raddr.start.ip4.as_u32) + continue; + if (vp->raddr.stop.ip4.as_u32 != policy->raddr.stop.ip4.as_u32) + continue; + if (policy->is_outbound) + { + vec_foreach_index(j, spd->ipv4_outbound_policies) { + if (vec_elt(spd->ipv4_outbound_policies, j) == i) { + vec_del1 (spd->ipv4_outbound_policies, j); + break; + } + } + } + else + { + if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) + { + vec_foreach_index(j, spd->ipv4_inbound_protect_policy_indices) { + if (vec_elt(spd->ipv4_inbound_protect_policy_indices, j) == i) { + vec_del1 (spd->ipv4_inbound_protect_policy_indices, j); + break; + } + } + } + else + { + vec_foreach_index(j, spd->ipv4_inbound_policy_discard_and_bypass_indices) { + if (vec_elt(spd->ipv4_inbound_policy_discard_and_bypass_indices, j) == i) { + vec_del1 (spd->ipv4_inbound_policy_discard_and_bypass_indices, j); + break; + } + } + } + } + pool_put (spd->policies, vp); + break; + } + })); + } + + return 0; +} + +static u8 +ipsec_is_sa_used(u32 sa_index) +{ + ipsec_main_t * im = &ipsec_main; + ipsec_spd_t * spd; + ipsec_policy_t * p; + + pool_foreach(spd, im->spds, ({ + pool_foreach(p, spd->policies, ({ + if (p->policy == IPSEC_POLICY_ACTION_PROTECT) + { + if (p->sa_index == sa_index) + return 1; + } + })); + })); + + return 0; +} + +int +ipsec_add_del_sa(vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_sa_t * sa = 0; + uword *p; + u32 sa_index; + + clib_warning("id %u spi %u", new_sa->id, new_sa->spi); + + p = hash_get (im->sa_index_by_sa_id, new_sa->id); + if (p && is_add) + return VNET_API_ERROR_SYSCALL_ERROR_1; /* already exists */ + if (!p && !is_add) + return VNET_API_ERROR_SYSCALL_ERROR_1; + + if (!is_add) /* delete */ + { + sa_index = p[0]; + sa = pool_elt_at_index(im->sad, sa_index); + if (ipsec_is_sa_used(sa_index)) + { + clib_warning("sa_id %u used in policy", sa->id); + return VNET_API_ERROR_SYSCALL_ERROR_1; /* sa used in policy */ + } + hash_unset (im->sa_index_by_sa_id, sa->id); + pool_put (im->sad, sa); + } + else /* create new SA */ + { + pool_get (im->sad, sa); + memcpy (sa, new_sa, sizeof (*sa)); + sa_index = sa - im->sad; + hash_set (im->sa_index_by_sa_id, sa->id, sa_index); + } + return 0; +} + +int +ipsec_set_sa_key(vlib_main_t * vm, ipsec_sa_t * sa_update) +{ + ipsec_main_t *im = &ipsec_main; + uword *p; + u32 sa_index; + ipsec_sa_t * sa = 0; + + p = hash_get (im->sa_index_by_sa_id, sa_update->id); + if (!p) + return VNET_API_ERROR_SYSCALL_ERROR_1; /* no such sa-id */ + + sa_index = p[0]; + sa = pool_elt_at_index(im->sad, sa_index); + + /* new crypto key */ + if (0 < sa_update->crypto_key_len) + { + memcpy(sa->crypto_key, sa_update->crypto_key, sa_update->crypto_key_len); + sa->crypto_key_len = sa_update->crypto_key_len; + } + + /* new integ key */ + if (0 < sa_update->integ_key_len) + { + memcpy(sa->integ_key, sa_update->integ_key, sa_update->integ_key_len); + sa->integ_key_len = sa_update->integ_key_len; + } + + return 0; +} + +static void +ipsec_rand_seed(void) +{ + struct { + time_t time; + pid_t pid; + void * p; + } seed_data; + + seed_data.time = time(NULL); + seed_data.pid = getpid(); + seed_data.p = (void *)&seed_data; + + RAND_seed((const void *)&seed_data, sizeof(seed_data)); +} + +static clib_error_t * +ipsec_init (vlib_main_t * vm) +{ + clib_error_t * error; + ipsec_main_t * im = &ipsec_main; + vlib_node_t * node; + + ipsec_rand_seed(); + + memset (im, 0, sizeof (im[0])); + + im->vnet_main = vnet_get_main(); + im->vlib_main = vm; + + im->spd_index_by_spd_id = hash_create (0, sizeof (uword)); + im->sa_index_by_sa_id = hash_create (0, sizeof (uword)); + im->spd_index_by_sw_if_index = hash_create (0, sizeof (uword)); + + node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); + ASSERT(node); + im->error_drop_node_index = node->index; + + node = vlib_get_node_by_name (vm, (u8 *) "esp-encrypt"); + ASSERT(node); + im->esp_encrypt_node_index = node->index; + + node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup"); + ASSERT(node); + im->ip4_lookup_node_index = node->index; + + + if ((error = vlib_call_init_function (vm, ipsec_cli_init))) + return error; + + if ((error = vlib_call_init_function (vm, ipsec_tunnel_if_init))) + return error; + + esp_init(); + + if ((error = ikev2_init (vm))) + return error; + + return 0; +} + +VLIB_INIT_FUNCTION (ipsec_init); diff --git a/vnet/vnet/ipsec/ipsec.h b/vnet/vnet/ipsec/ipsec.h new file mode 100644 index 00000000000..6ef36d02855 --- /dev/null +++ b/vnet/vnet/ipsec/ipsec.h @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/devices/dpdk/dpdk.h> + +#define foreach_ipsec_policy_action \ + _(0, BYPASS, "bypass") \ + _(1, DISCARD, "discard") \ + _(2, RESOLVE, "resolve") \ + _(3, PROTECT, "protect") + +typedef enum { +#define _(v,f,s) IPSEC_POLICY_ACTION_##f = v, + foreach_ipsec_policy_action +#undef _ + IPSEC_POLICY_N_ACTION, +} ipsec_policy_action_t; + +#define foreach_ipsec_crypto_alg \ + _(0, NONE, "none") \ + _(1, AES_CBC_128, "aes-cbc-128") \ + _(2, AES_CBC_192, "aes-cbc-192") \ + _(3, AES_CBC_256, "aes-cbc-256") + +typedef enum { +#define _(v,f,s) IPSEC_CRYPTO_ALG_##f = v, + foreach_ipsec_crypto_alg +#undef _ + IPSEC_CRYPTO_N_ALG, +} ipsec_crypto_alg_t; + +#define foreach_ipsec_integ_alg \ + _(0, NONE, "none") \ + _(1, MD5_96, "md5-96") /* RFC2403 */ \ + _(2, SHA1_96, "sha1-96") /* RFC2404 */ \ + _(3, SHA_256_96, "sha-256-96") /* draft-ietf-ipsec-ciph-sha-256-00 */ \ + _(4, SHA_256_128, "sha-256-128") /* RFC4868 */ \ + _(5, SHA_384_192, "sha-384-192") /* RFC4868 */ \ + _(6, SHA_512_256, "sha-512-256") /* RFC4868 */ + +typedef enum { +#define _(v,f,s) IPSEC_INTEG_ALG_##f = v, + foreach_ipsec_integ_alg +#undef _ + IPSEC_INTEG_N_ALG, +} ipsec_integ_alg_t; + +typedef enum { + IPSEC_PROTOCOL_AH = 0, + IPSEC_PROTOCOL_ESP = 1 +} ipsec_protocol_t; + +typedef struct { + u32 id; + u32 spi; + ipsec_protocol_t protocol; + + ipsec_crypto_alg_t crypto_alg; + u8 crypto_key_len; + u8 crypto_key[128]; + + ipsec_integ_alg_t integ_alg; + u8 integ_key_len; + u8 integ_key[128]; + + u8 use_esn; + u8 use_anti_replay; + + u8 is_tunnel; + u8 is_tunnel_ip6; + ip46_address_t tunnel_src_addr; + ip46_address_t tunnel_dst_addr; + + /* runtime */ + u32 seq; + u32 seq_hi; + u32 last_seq; + u32 last_seq_hi; + u64 replay_window; +} ipsec_sa_t; + +typedef struct { + ip46_address_t start, stop; +} ip46_address_range_t; + +typedef struct { + u16 start, stop; +} port_range_t; + +typedef struct { + u8 is_add; + u8 esn; + u8 anti_replay; + ip4_address_t local_ip, remote_ip; + u32 local_spi; + u32 remote_spi; +} ipsec_add_del_tunnel_args_t; + +typedef enum { + IPSEC_IF_SET_KEY_TYPE_NONE, + IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO, + IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO, + IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG, + IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG, +} ipsec_if_set_key_type_t; + +typedef struct { + u32 id; + i32 priority; + u8 is_outbound; + + // Selector + u8 is_ipv6; + ip46_address_range_t laddr; + ip46_address_range_t raddr; + u8 protocol; + port_range_t lport; + port_range_t rport; + + // Policy + u8 policy; + u32 sa_id; + u32 sa_index; + + // Counter + vlib_counter_t counter; +} ipsec_policy_t; + +typedef struct { + u32 id; + /* pool of policies */ + ipsec_policy_t * policies; + /* vectors of policy indices */ + u32 * ipv4_outbound_policies; + u32 * ipv6_outbound_policies; + u32 * ipv4_inbound_protect_policy_indices; + u32 * ipv4_inbound_policy_discard_and_bypass_indices; + u32 * ipv6_inbound_protect_policy_indices; + u32 * ipv6_inbound_policy_discard_and_bypass_indices; +} ipsec_spd_t; + +typedef struct { + u32 spd_index; +} ip4_ipsec_config_t; + +typedef struct { + u32 spd_index; +} ip6_ipsec_config_t; + +typedef struct { + u32 input_sa_index; + u32 output_sa_index; + u32 hw_if_index; +} ipsec_tunnel_if_t; + +typedef struct { + /* pool of tunnel instances */ + ipsec_spd_t * spds; + ipsec_sa_t * sad; + + /* pool of tunnel interfaces */ + ipsec_tunnel_if_t * tunnel_interfaces; + u32 * free_tunnel_if_indices; + + u32 * empty_buffers; + + uword * tunnel_index_by_key; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + + /* next node indices */ + u32 feature_next_node_index[32]; + + /* hashes */ + uword * spd_index_by_spd_id; + uword * spd_index_by_sw_if_index; + uword * sa_index_by_sa_id; + uword * ipsec_if_pool_index_by_key; + + /* node indexes */ + u32 error_drop_node_index; + u32 ip4_lookup_node_index; + u32 esp_encrypt_node_index; + +} ipsec_main_t; + +ipsec_main_t ipsec_main; + +vlib_node_registration_t ipsec_input_ip4_node; +vlib_node_registration_t ipsec_input_ip6_node; +vlib_node_registration_t ipsec_output_node; +vlib_node_registration_t esp_encrypt_node; +vlib_node_registration_t esp_decrypt_node; +vlib_node_registration_t ipsec_if_output_node; +vlib_node_registration_t ipsec_if_input_node; + + +/* + * functions + */ +int ipsec_set_interface_spd(vlib_main_t * vm, u32 sw_if_index, u32 spd_id, int is_add); +int ipsec_add_del_spd(vlib_main_t * vm, u32 spd_id, int is_add); +int ipsec_add_del_policy(vlib_main_t * vm, ipsec_policy_t * policy, int is_add); +int ipsec_add_del_sa(vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add); +int ipsec_set_sa_key(vlib_main_t * vm, ipsec_sa_t * sa_update); + +u8 * format_ipsec_if_output_trace (u8 * s, va_list * args); +u8 * format_ipsec_policy_action (u8 * s, va_list * args); +u8 * format_ipsec_crypto_alg (u8 * s, va_list * args); +u8 * format_ipsec_integ_alg (u8 * s, va_list * args); +u8 * format_ipsec_replay_window(u8 * s, va_list * args); +uword unformat_ipsec_policy_action (unformat_input_t * input, va_list * args); +uword unformat_ipsec_crypto_alg (unformat_input_t * input, va_list * args); +uword unformat_ipsec_integ_alg (unformat_input_t * input, va_list * args); + +u32 ipsec_add_del_tunnel_if (vnet_main_t * vnm, ipsec_add_del_tunnel_args_t * args); +int ipsec_set_interface_key(vnet_main_t * vnm, u32 hw_if_index, ipsec_if_set_key_type_t type, u8 alg, u8 * key); + + +/* + * inline functions + */ + +always_inline void +ipsec_alloc_empty_buffers(vlib_main_t * vm, ipsec_main_t *im) +{ + dpdk_main_t * dm = &dpdk_main; + u32 free_list_index = dm->vlib_buffer_free_list_index; + + uword l = vec_len (im->empty_buffers); + uword n_alloc = 0; + + if (PREDICT_FALSE(l < VLIB_FRAME_SIZE)) + { + if (!im->empty_buffers) { + vec_alloc (im->empty_buffers, 2 * VLIB_FRAME_SIZE ); + } + + n_alloc = vlib_buffer_alloc_from_free_list (vm, im->empty_buffers + l, + 2 * VLIB_FRAME_SIZE - l, + free_list_index); + + _vec_len (im->empty_buffers) = l + n_alloc; + } +} + +static_always_inline u32 /* FIXME move to interface???.h */ +get_next_output_feature_node_index( vnet_main_t * vnm, + vlib_buffer_t * b) +{ + vlib_main_t * vm = vlib_get_main(); + vlib_node_t * node; + u32 r; + intf_output_feat_t next_feature; + + u8 * node_names[] = { +#define _(sym, str) (u8 *) str, + foreach_intf_output_feat +#undef _ + }; + + count_trailing_zeros(next_feature, vnet_buffer(b)->output_features.bitmap); + + if (next_feature >= INTF_OUTPUT_FEAT_DONE) + { + u32 sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_TX]; + vnet_hw_interface_t * hw = vnet_get_sup_hw_interface(vnm, sw_if_index); + r = hw->output_node_index; + } + else + { + vnet_buffer(b)->output_features.bitmap &= ~(1 << next_feature); + /* FIXME */ + node = vlib_get_node_by_name(vm, node_names[next_feature]); + r = node->index; + } + + return r; +} diff --git a/vnet/vnet/ipsec/ipsec_cli.c b/vnet/vnet/ipsec/ipsec_cli.c new file mode 100644 index 00000000000..0205d8bc877 --- /dev/null +++ b/vnet/vnet/ipsec/ipsec_cli.c @@ -0,0 +1,710 @@ +/* + * decap.c : IPSec tunnel support + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> +#include <vnet/interface.h> + +#include <vnet/ipsec/ipsec.h> + +static clib_error_t * +set_interface_spd_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ipsec_main_t *im = &ipsec_main; + u32 sw_if_index = (u32) ~0; + u32 spd_id; + int is_add = 1; + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + if (unformat (line_input, "%U %u", unformat_vnet_sw_interface, im->vnet_main, + &sw_if_index, &spd_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + + unformat_free (line_input); + + ipsec_set_interface_spd(vm, sw_if_index, spd_id, is_add); + + return 0; +} + +VLIB_CLI_COMMAND (set_interface_spd_command, static) = { + .path = "set interface ipsec spd", + .short_help = + "set interface ipsec spd <int> <id>", + .function = set_interface_spd_command_fn, +}; + +static clib_error_t * +ipsec_sa_add_del_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ipsec_sa_t sa; + int is_add = ~0; + u8 * ck, * ik; + + memset(&sa, 0, sizeof(sa)); + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "add %u", &sa.id)) + is_add = 1; + else if (unformat (line_input, "del %u", &sa.id)) + is_add = 0; + else if (unformat (line_input, "spi %u", &sa.spi)) + ; + else if (unformat (line_input, "esp")) + sa.protocol = IPSEC_PROTOCOL_ESP; + else if (unformat (line_input, "ah")) + //sa.protocol = IPSEC_PROTOCOL_AH; + return clib_error_return(0, "unsupported security protocol 'AH'"); + else if (unformat (line_input, "crypto-key %U", unformat_hex_string, &ck)) + sa.crypto_key_len = vec_len (ck); + else if (unformat (line_input, "crypto-alg %U", unformat_ipsec_crypto_alg, + &sa.crypto_alg)) + { + if (sa.crypto_alg < IPSEC_CRYPTO_ALG_AES_CBC_128 || + sa.crypto_alg > IPSEC_CRYPTO_ALG_AES_CBC_256) + return clib_error_return(0, "unsupported crypto-alg: '%U'", + format_ipsec_crypto_alg, sa.crypto_alg); + } + else if (unformat (line_input, "integ-key %U", unformat_hex_string, &ik)) + sa.integ_key_len = vec_len (ik); + else if (unformat (line_input, "integ-alg %U", unformat_ipsec_integ_alg, + &sa.integ_alg)) + { + if (sa.integ_alg < IPSEC_INTEG_ALG_SHA1_96 || + sa.integ_alg > IPSEC_INTEG_ALG_SHA_512_256) + return clib_error_return(0, "unsupported integ-alg: '%U'", + format_ipsec_integ_alg, sa.integ_alg); + } + else if (unformat (line_input, "tunnel-src %U", + unformat_ip4_address, &sa.tunnel_src_addr.ip4)) + sa.is_tunnel = 1; + else if (unformat (line_input, "tunnel-dst %U", + unformat_ip4_address, &sa.tunnel_dst_addr.ip4)) + sa.is_tunnel = 1; + else if (unformat (line_input, "tunnel-src %U", + unformat_ip6_address, &sa.tunnel_src_addr.ip6)) + { sa.is_tunnel = 1; sa.is_tunnel_ip6 = 1; } + else if (unformat (line_input, "tunnel-dst %U", + unformat_ip6_address, &sa.tunnel_dst_addr.ip6)) + { sa.is_tunnel = 1; sa.is_tunnel_ip6 = 1; } + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (sa.crypto_key_len > sizeof(sa.crypto_key)) + sa.crypto_key_len = sizeof(sa.crypto_key); + + if (sa.integ_key_len > sizeof(sa.integ_key)) + sa.integ_key_len = sizeof(sa.integ_key); + + if (ck) + strncpy((char *) sa.crypto_key, (char *) ck, sa.crypto_key_len); + + if (ik) + strncpy((char *) sa.integ_key, (char *) ik, sa.integ_key_len); + + ipsec_add_del_sa(vm, &sa, is_add); + + return 0; +} + +VLIB_CLI_COMMAND (ipsec_sa_add_del_command, static) = { + .path = "ipsec sa", + .short_help = + "ipsec sa [add|del]", + .function = ipsec_sa_add_del_command_fn, +}; + +static clib_error_t * +ipsec_spd_add_del_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + u32 spd_id; + int is_add = ~0; + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "add")) + is_add = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "%u", &spd_id)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + ipsec_add_del_spd(vm, spd_id, is_add); + + return 0; +} + +VLIB_CLI_COMMAND (ipsec_spd_add_del_command, static) = { + .path = "ipsec spd", + .short_help = + "ipsec spd [add|del] <id>", + .function = ipsec_spd_add_del_command_fn, +}; + + +static clib_error_t * +ipsec_policy_add_del_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ipsec_policy_t p; + int is_add = 0; + int is_ip_any = 1; + u32 tmp, tmp2; + + memset(&p, 0, sizeof(p)); + p.lport.stop = p.rport.stop = ~0; + p.laddr.stop.ip4.as_u32 = p.raddr.stop.ip4.as_u32 = (u32) ~0; + p.laddr.stop.ip6.as_u64[0] = p.laddr.stop.ip6.as_u64[1] = (u64) ~0; + p.raddr.stop.ip6.as_u64[0] = p.raddr.stop.ip6.as_u64[1] = (u64) ~0; + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "add")) + is_add = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "spd %u", &p.id)) + ; + else if (unformat (line_input, "inbound")) + p.is_outbound = 0; + else if (unformat (line_input, "outbound")) + p.is_outbound = 1; + else if (unformat (line_input, "priority %d", &p.priority)) + ; + else if (unformat (line_input, "protocol %u", &tmp)) + p.protocol = (u8) tmp; + else if (unformat (line_input, "action %U", unformat_ipsec_policy_action, + &p.policy)) + { + if (p.policy == IPSEC_POLICY_ACTION_RESOLVE) + return clib_error_return(0, "unsupported action: 'resolve'"); + } + else if (unformat (line_input, "sa %u", &p.sa_id)) + ; + else if (unformat (line_input, "local-ip-range %U - %U", + unformat_ip4_address, &p.laddr.start.ip4, + unformat_ip4_address, &p.laddr.stop.ip4)) + is_ip_any = 0; + else if (unformat (line_input, "remote-ip-range %U - %U", + unformat_ip4_address, &p.raddr.start.ip4, + unformat_ip4_address, &p.raddr.stop.ip4)) + is_ip_any = 0; + else if (unformat (line_input, "local-ip-range %U - %U", + unformat_ip6_address, &p.laddr.start.ip6, + unformat_ip6_address, &p.laddr.stop.ip6)) + { + p.is_ipv6 = 1; + is_ip_any = 0; + } + else if (unformat (line_input, "remote-ip-range %U - %U", + unformat_ip6_address, &p.raddr.start.ip6, + unformat_ip6_address, &p.raddr.stop.ip6)) + { + p.is_ipv6 = 1; + is_ip_any = 0; + } + else if (unformat (line_input, "local-port-range %u - %u", &tmp, &tmp2)) + { p.lport.start = tmp; p.lport.stop = tmp2; } + else if (unformat (line_input, "remote-port-range %u - %u", &tmp, &tmp2)) + { p.rport.start = tmp; p.rport.stop = tmp2; } + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + ipsec_add_del_policy(vm, &p, is_add); + if (is_ip_any) + { + p.is_ipv6 = 1; + ipsec_add_del_policy(vm, &p, is_add); + } + return 0; +} + +VLIB_CLI_COMMAND (ipsec_policy_add_del_command, static) = { + .path = "ipsec policy", + .short_help = + "ipsec policy [add|del] spd <id> priority <n> ", + .function = ipsec_policy_add_del_command_fn, +}; + +static clib_error_t * +set_ipsec_sa_key_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ipsec_sa_t sa; + u8 * ck, * ik; + + memset(&sa, 0, sizeof(sa)); + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "%u", &sa.id)) + ; + else if (unformat (line_input, "crypto-key %U", unformat_hex_string, &ck)) + sa.crypto_key_len = vec_len (ck); + else if (unformat (line_input, "integ-key %U", unformat_hex_string, &ik)) + sa.integ_key_len = vec_len (ik); + else + return clib_error_return (0, "parse error: '%U'", format_unformat_error, + line_input); + } + + unformat_free (line_input); + + if (sa.crypto_key_len > sizeof(sa.crypto_key)) + sa.crypto_key_len = sizeof(sa.crypto_key); + + if (sa.integ_key_len > sizeof(sa.integ_key)) + sa.integ_key_len = sizeof(sa.integ_key); + + if (ck) + strncpy((char *) sa.crypto_key, (char *) ck, sa.crypto_key_len); + + if (ik) + strncpy((char *) sa.integ_key, (char *) ik, sa.integ_key_len); + + ipsec_set_sa_key(vm, &sa); + + return 0; +} + +VLIB_CLI_COMMAND (set_ipsec_sa_key_command, static) = { + .path = "set ipsec sa", + .short_help = + "set ipsec sa <id> crypto-key <key> integ-key <key>", + .function = set_ipsec_sa_key_command_fn, +}; + +static clib_error_t * +show_ipsec_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ipsec_spd_t * spd; + ipsec_sa_t * sa; + ipsec_policy_t * p; + ipsec_main_t * im = &ipsec_main; + u32 * i; + ipsec_tunnel_if_t * t; + vnet_hw_interface_t * hi; + + pool_foreach (sa, im->sad, ({ + if (sa->id) { + vlib_cli_output(vm, "sa %u spi %u mode %s protocol %s", sa->id, sa->spi, + sa->is_tunnel ? "tunnel" : "transport", + sa->protocol ? "esp" : "ah"); + if (sa->protocol == IPSEC_PROTOCOL_ESP) { + vlib_cli_output(vm, " crypto alg %U%s%U integrity alg %U%s%U", + format_ipsec_crypto_alg, sa->crypto_alg, + sa->crypto_alg ? " key " : "", + format_hex_bytes, sa->crypto_key, sa->crypto_key_len, + format_ipsec_integ_alg, sa->integ_alg, + sa->integ_alg ? " key " : "", + format_hex_bytes, sa->integ_key, sa->integ_key_len); + } + if (sa->is_tunnel && sa->is_tunnel_ip6) { + vlib_cli_output(vm, " tunnel src %U dst %U", + format_ip6_address, &sa->tunnel_src_addr.ip6, + format_ip6_address, &sa->tunnel_dst_addr.ip6); + } else if (sa->is_tunnel) { + vlib_cli_output(vm, " tunnel src %U dst %U", + format_ip4_address, &sa->tunnel_src_addr.ip4, + format_ip4_address, &sa->tunnel_dst_addr.ip4); + } + } + })); + + pool_foreach (spd, im->spds, ({ + vlib_cli_output(vm, "spd %u", spd->id); + + vlib_cli_output(vm, " outbound policies"); + vec_foreach(i, spd->ipv4_outbound_policies) + { + p = pool_elt_at_index(spd->policies, *i); + vlib_cli_output(vm, " priority %d action %U protocol %s%s", + p->priority, + format_ipsec_policy_action, p->policy, + p->protocol ? + format(0, "%U", format_ip_protocol, p->protocol) : + (u8 *) "any", + p->policy == IPSEC_POLICY_ACTION_PROTECT ? + format(0, " sa %u", p->sa_id) : + (u8 *) ""); + vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", + format_ip4_address, &p->laddr.start.ip4, + format_ip4_address, &p->laddr.stop.ip4, + p->lport.start, p->lport.stop); + vlib_cli_output(vm, " remte addr range %U - %U port range %u - %u", + format_ip4_address, &p->raddr.start.ip4, + format_ip4_address, &p->raddr.stop.ip4, + p->rport.start, p->rport.stop); + vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, + p->counter.bytes); + }; + vec_foreach(i, spd->ipv6_outbound_policies) + { + p = pool_elt_at_index(spd->policies, *i); + vlib_cli_output(vm, " priority %d action %U protocol %s%s", + p->priority, + format_ipsec_policy_action, p->policy, + p->protocol ? + format(0, "%U", format_ip_protocol, p->protocol) : + (u8 *) "any", + p->policy == IPSEC_POLICY_ACTION_PROTECT ? + format(0, " sa %u", p->sa_id) : + (u8 *) ""); + vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", + format_ip6_address, &p->laddr.start.ip6, + format_ip6_address, &p->laddr.stop.ip6, + p->lport.start, p->lport.stop); + vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u", + format_ip6_address, &p->raddr.start.ip6, + format_ip6_address, &p->raddr.stop.ip6, + p->rport.start, p->rport.stop); + vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, + p->counter.bytes); + }; + vlib_cli_output(vm, " inbound policies"); + vec_foreach(i, spd->ipv4_inbound_protect_policy_indices) + { + p = pool_elt_at_index(spd->policies, *i); + vlib_cli_output(vm, " priority %d action %U protocol %s%s", + p->priority, + format_ipsec_policy_action, p->policy, + p->protocol ? + format(0, "%U", format_ip_protocol, p->protocol) : + (u8 *) "any", + p->policy == IPSEC_POLICY_ACTION_PROTECT ? + format(0, " sa %u", p->sa_id) : + (u8 *) ""); + vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", + format_ip4_address, &p->laddr.start.ip4, + format_ip4_address, &p->laddr.stop.ip4, + p->lport.start, p->lport.stop); + vlib_cli_output(vm, " remte addr range %U - %U port range %u - %u", + format_ip4_address, &p->raddr.start.ip4, + format_ip4_address, &p->raddr.stop.ip4, + p->rport.start, p->rport.stop); + vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, + p->counter.bytes); + }; + vec_foreach(i, spd->ipv4_inbound_policy_discard_and_bypass_indices) + { + p = pool_elt_at_index(spd->policies, *i); + vlib_cli_output(vm, " priority %d action %U protocol %s%s", + p->priority, + format_ipsec_policy_action, p->policy, + p->protocol ? + format(0, "%U", format_ip_protocol, p->protocol) : + (u8 *) "any", + p->policy == IPSEC_POLICY_ACTION_PROTECT ? + format(0, " sa %u", p->sa_id) : + (u8 *) ""); + vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", + format_ip4_address, &p->laddr.start.ip4, + format_ip4_address, &p->laddr.stop.ip4, + p->lport.start, p->lport.stop); + vlib_cli_output(vm, " remte addr range %U - %U port range %u - %u", + format_ip4_address, &p->raddr.start.ip4, + format_ip4_address, &p->raddr.stop.ip4, + p->rport.start, p->rport.stop); + vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, + p->counter.bytes); + }; + vec_foreach(i, spd->ipv6_inbound_protect_policy_indices) + { + p = pool_elt_at_index(spd->policies, *i); + vlib_cli_output(vm, " priority %d action %U protocol %s%s", + p->priority, + format_ipsec_policy_action, p->policy, + p->protocol ? + format(0, "%U", format_ip_protocol, p->protocol) : + (u8 *) "any", + p->policy == IPSEC_POLICY_ACTION_PROTECT ? + format(0, " sa %u", p->sa_id) : + (u8 *) ""); + vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", + format_ip6_address, &p->laddr.start.ip6, + format_ip6_address, &p->laddr.stop.ip6, + p->lport.start, p->lport.stop); + vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u", + format_ip6_address, &p->raddr.start.ip6, + format_ip6_address, &p->raddr.stop.ip6, + p->rport.start, p->rport.stop); + vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, + p->counter.bytes); + }; + vec_foreach(i, spd->ipv6_inbound_policy_discard_and_bypass_indices) + { + p = pool_elt_at_index(spd->policies, *i); + vlib_cli_output(vm, " priority %d action %U protocol %s%s", + p->priority, + format_ipsec_policy_action, p->policy, + p->protocol ? + format(0, "%U", format_ip_protocol, p->protocol) : + (u8 *) "any", + p->policy == IPSEC_POLICY_ACTION_PROTECT ? + format(0, " sa %u", p->sa_id) : + (u8 *) ""); + vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", + format_ip6_address, &p->laddr.start.ip6, + format_ip6_address, &p->laddr.stop.ip6, + p->lport.start, p->lport.stop); + vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u", + format_ip6_address, &p->raddr.start.ip6, + format_ip6_address, &p->raddr.stop.ip6, + p->rport.start, p->rport.stop); + vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, + p->counter.bytes); + }; + })); + + vlib_cli_output(vm, "tunnel interfaces"); + pool_foreach (t, im->tunnel_interfaces, ({ + hi = vnet_get_hw_interface (im->vnet_main, t->hw_if_index); + vlib_cli_output(vm, " %s seq", hi->name); + sa = pool_elt_at_index(im->sad, t->output_sa_index); + vlib_cli_output(vm, " seq %u seq-hi %u esn %u anti-replay %u", + sa->seq, sa->seq_hi, sa->use_esn, sa->use_anti_replay); + vlib_cli_output(vm, " local-spi %u local-ip %U", sa->spi, + format_ip4_address, &sa->tunnel_src_addr.ip4); + vlib_cli_output(vm, " local-crypto %U %U", + format_ipsec_crypto_alg, sa->crypto_alg, + format_hex_bytes, sa->crypto_key, sa->crypto_key_len); + vlib_cli_output(vm, " local-integrity %U %U", + format_ipsec_integ_alg, sa->integ_alg, + format_hex_bytes, sa->integ_key, sa->integ_key_len); + sa = pool_elt_at_index(im->sad, t->input_sa_index); + vlib_cli_output(vm, " last-seq %u last-seq-hi %u esn %u anti-replay %u window %U", + sa->last_seq, sa->last_seq_hi, sa->use_esn, + sa->use_anti_replay, + format_ipsec_replay_window, sa->replay_window); + vlib_cli_output(vm, " remote-spi %u remote-ip %U", sa->spi, + format_ip4_address, &sa->tunnel_src_addr.ip4); + vlib_cli_output(vm, " remote-crypto %U %U", + format_ipsec_crypto_alg, sa->crypto_alg, + format_hex_bytes, sa->crypto_key, sa->crypto_key_len); + vlib_cli_output(vm, " remote-integrity %U %U", + format_ipsec_integ_alg, sa->integ_alg, + format_hex_bytes, sa->integ_key, sa->integ_key_len); + })); + return 0; +} + +VLIB_CLI_COMMAND (show_ipsec_command, static) = { + .path = "show ipsec", + .short_help = "show ipsec", + .function = show_ipsec_command_fn, +}; + +static clib_error_t * +clear_ipsec_counters_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ipsec_main_t * im = &ipsec_main; + ipsec_spd_t * spd; + ipsec_policy_t * p; + + pool_foreach (spd, im->spds, ({ + pool_foreach(p, spd->policies, ({ + p->counter.packets = p->counter.bytes = 0; + })); + })); + + return 0; +} + +VLIB_CLI_COMMAND (clear_ipsec_counters_command, static) = { + .path = "clear ipsec counters", + .short_help = "clear ipsec counters", + .function = clear_ipsec_counters_command_fn, +}; + +static clib_error_t * +create_ipsec_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ipsec_add_del_tunnel_args_t a; + ipsec_main_t *im = &ipsec_main; + int rv; + u32 num_m_args = 0; + a.is_add = 1; + a.anti_replay = 0; + a.esn = 0; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "local-ip %U", unformat_ip4_address, &a.local_ip)) + num_m_args++; + else if (unformat (line_input, "remote-ip %U", unformat_ip4_address, &a.remote_ip)) + num_m_args++; + else if (unformat (line_input, "local-spi %u", &a.local_spi)) + num_m_args++; + else if (unformat (line_input, "remote-spi %u", &a.remote_spi)) + num_m_args++; + else if (unformat (line_input, "del")) + a.is_add = 0; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (num_m_args < 4) + return clib_error_return (0, "mandatory argument(s) missing"); + + rv = ipsec_add_del_tunnel_if (im->vnet_main, &a); + + switch(rv) + { + case 0: + break; + case VNET_API_ERROR_INVALID_VALUE: + if (a.is_add) + return clib_error_return (0, "IPSec tunnel interface already exists..."); + else + return clib_error_return (0, "IPSec tunnel interface not exists..."); + default: + return clib_error_return (0, "ipsec_register_interface returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (create_ipsec_tunnel_command, static) = { + .path = "create ipsec tunnel", + .short_help = "create ipsec tunnel local-ip <addr> local-spi <spi> remote-ip <addr> remote-spi <spi>", + .function = create_ipsec_tunnel_command_fn, +}; + +static clib_error_t * +set_interface_key_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ipsec_main_t *im = &ipsec_main; + ipsec_if_set_key_type_t type = IPSEC_IF_SET_KEY_TYPE_NONE; + u32 hw_if_index = (u32) ~0; + u32 alg; + u8 * key = 0; + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", + unformat_vnet_hw_interface, im->vnet_main, &hw_if_index)) + ; + else if (unformat (line_input, "local crypto %U", unformat_ipsec_crypto_alg, &alg)) + type = IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO; + else if (unformat (line_input, "remote crypto %U", unformat_ipsec_crypto_alg, &alg)) + type = IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO; + else if (unformat (line_input, "local integ %U", unformat_ipsec_integ_alg, &alg)) + type = IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG; + else if (unformat (line_input, "remote integ %U", unformat_ipsec_integ_alg, &alg)) + type = IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG; + else if (unformat (line_input, "%U", unformat_hex_string, &key)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (type == IPSEC_IF_SET_KEY_TYPE_NONE) + return clib_error_return (0, "unknown key type"); + + if (alg > 0 && vec_len(key)==0) + return clib_error_return (0, "key is not specified"); + + if (hw_if_index == (u32) ~0) + return clib_error_return (0, "interface not specified"); + + ipsec_set_interface_key(im->vnet_main, hw_if_index, type, alg, key); + vec_free(key); + + return 0; +} + +VLIB_CLI_COMMAND (set_interface_key_command, static) = { + .path = "set interface ipsec key", + .short_help = + "set interface ipsec key <int> <local|remote> <crypto|integ> <key type> <key>", + .function = set_interface_key_command_fn, +}; + + +clib_error_t * +ipsec_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (ipsec_cli_init); + diff --git a/vnet/vnet/ipsec/ipsec_format.c b/vnet/vnet/ipsec/ipsec_format.c new file mode 100644 index 00000000000..f3720abf6c3 --- /dev/null +++ b/vnet/vnet/ipsec/ipsec_format.c @@ -0,0 +1,133 @@ +/* + * decap.c : IPSec tunnel support + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> +#include <vnet/interface.h> + +#include <vnet/ipsec/ipsec.h> + +u8 * +format_ipsec_policy_action (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + char * t = 0; + + switch (i) + { +#define _(v,f,str) case IPSEC_POLICY_ACTION_##f: t = str; break; + foreach_ipsec_policy_action +#undef _ + default: + s = format (s, "unknown"); + } + s = format (s, "%s", t); + return s; +} + +uword +unformat_ipsec_policy_action (unformat_input_t * input, va_list * args) +{ + u32 * r = va_arg (*args, u32 *); + + if (0) ; +#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_POLICY_ACTION_##f; + foreach_ipsec_policy_action +#undef _ + else + return 0; + return 1; +} + +u8 * +format_ipsec_crypto_alg (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 * t = 0; + + switch (i) + { +#define _(v,f,str) case IPSEC_CRYPTO_ALG_##f: t = (u8 *) str; break; + foreach_ipsec_crypto_alg +#undef _ + default: + s = format (s, "unknown"); + } + s = format (s, "%s", t); + return s; +} + +uword +unformat_ipsec_crypto_alg (unformat_input_t * input, va_list * args) +{ + u32 * r = va_arg (*args, u32 *); + + if (0) ; +#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_CRYPTO_ALG_##f; + foreach_ipsec_crypto_alg +#undef _ + else + return 0; + return 1; +} + +u8 * +format_ipsec_integ_alg (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 * t = 0; + + switch (i) + { +#define _(v,f,str) case IPSEC_INTEG_ALG_##f: t = (u8 *) str; break; + foreach_ipsec_integ_alg +#undef _ + default: + s = format (s, "unknown"); + } + s = format (s, "%s", t); + return s; +} + +uword +unformat_ipsec_integ_alg (unformat_input_t * input, va_list * args) +{ + u32 * r = va_arg (*args, u32 *); + + if (0) ; +#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_INTEG_ALG_##f; + foreach_ipsec_integ_alg +#undef _ + else + return 0; + return 1; +} + +u8 * +format_ipsec_replay_window(u8 * s, va_list * args) +{ + u64 w = va_arg (*args, u64); + u8 i; + + for (i = 0; i < 64; i++) + { + s = format (s, "%u", w & (1ULL<<i) ? 1 : 0); + } + + return s; +} diff --git a/vnet/vnet/ipsec/ipsec_if.c b/vnet/vnet/ipsec/ipsec_if.c new file mode 100644 index 00000000000..f4c535840d2 --- /dev/null +++ b/vnet/vnet/ipsec/ipsec_if.c @@ -0,0 +1,199 @@ +/* + * ipsec_if.c : IPSec interface support + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> + +#include <vnet/ipsec/ipsec.h> + +static u8 * format_ipsec_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "ipsec%d", dev_instance); +} + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +VNET_DEVICE_CLASS (ipsec_device_class,static) = { + .name = "IPSec", + .format_device_name = format_ipsec_name, + .format_tx_trace = format_ipsec_if_output_trace, + .tx_function = dummy_interface_tx, +}; + +VNET_HW_INTERFACE_CLASS (ipsec_hw_class) = { + .name = "IPSec", +}; + +u32 +ipsec_add_del_tunnel_if (vnet_main_t * vnm, ipsec_add_del_tunnel_args_t * args) +{ + ipsec_tunnel_if_t * t; + ipsec_main_t * im = &ipsec_main; + vnet_hw_interface_t * hi; + u32 hw_if_index = ~0; + uword *p; + ipsec_sa_t * sa; + + u64 key = (u64) args->remote_ip.as_u32 << 32 | (u64) args->remote_spi; + p = hash_get (im->ipsec_if_pool_index_by_key, key); + + if (args->is_add) + { + /* check if same src/dst pair exists */ + if (p) + return VNET_API_ERROR_INVALID_VALUE; + + pool_get_aligned (im->tunnel_interfaces, t, CLIB_CACHE_LINE_BYTES); + memset (t, 0, sizeof (*t)); + + pool_get (im->sad, sa); + memset (sa, 0, sizeof (*sa)); + t->input_sa_index = sa - im->sad; + sa->spi = args->remote_spi; + sa->tunnel_src_addr.ip4.as_u32 = args->remote_ip.as_u32; + sa->tunnel_dst_addr.ip4.as_u32 = args->local_ip.as_u32; + sa->is_tunnel = 1; + sa->use_esn = args->esn; + sa->use_anti_replay = args->anti_replay; + + pool_get (im->sad, sa); + memset (sa, 0, sizeof (*sa)); + t->output_sa_index = sa - im->sad; + sa->spi = args->local_spi; + sa->tunnel_src_addr.ip4.as_u32 = args->local_ip.as_u32; + sa->tunnel_dst_addr.ip4.as_u32 = args->remote_ip.as_u32; + sa->is_tunnel = 1; + sa->seq = 1; + sa->use_esn = args->esn; + sa->use_anti_replay = args->anti_replay; + + hash_set (im->ipsec_if_pool_index_by_key, key, t - im->tunnel_interfaces); + + if (vec_len (im->free_tunnel_if_indices) > 0) + { + hw_if_index = + im->free_tunnel_if_indices[vec_len(im->free_tunnel_if_indices)-1]; + _vec_len (im->free_tunnel_if_indices) -= 1; + } + else + { + hw_if_index = vnet_register_interface(vnm, ipsec_device_class.index, + t - im->tunnel_interfaces, + ipsec_hw_class.index, + t - im->tunnel_interfaces); + + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->output_node_index = ipsec_if_output_node.index; + } + t->hw_if_index = hw_if_index; + + /*1st interface, register protocol */ + if (pool_elts(im->tunnel_interfaces) == 1) + ip4_register_protocol(IP_PROTOCOL_IPSEC_ESP, ipsec_if_input_node.index); + + return hw_if_index; + } + else + { + /* check if exists */ + if (!p) + return VNET_API_ERROR_INVALID_VALUE; + + t = pool_elt_at_index(im->tunnel_interfaces, p[0]); + hi = vnet_get_hw_interface (vnm, t->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, 0); /* admin down */ + vec_add1 (im->free_tunnel_if_indices, t->hw_if_index); + + /* delete input and output SA */ + sa = pool_elt_at_index(im->sad, t->input_sa_index); + pool_put (im->sad, sa); + sa = pool_elt_at_index(im->sad, t->output_sa_index); + pool_put (im->sad, sa); + + hash_unset (im->ipsec_if_pool_index_by_key, key); + pool_put (im->tunnel_interfaces, t); + } + return 0; +} + +int +ipsec_set_interface_key(vnet_main_t * vnm, u32 hw_if_index, + ipsec_if_set_key_type_t type, u8 alg, u8 * key) +{ + ipsec_main_t * im = &ipsec_main; + vnet_hw_interface_t * hi; + ipsec_tunnel_if_t * t; + ipsec_sa_t * sa; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + t = pool_elt_at_index (im->tunnel_interfaces, hi->dev_instance); + + if (type == IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO) + { + sa = pool_elt_at_index(im->sad, t->output_sa_index); + sa->crypto_alg = alg; + sa->crypto_key_len = vec_len(key); + memcpy(sa->crypto_key, key, vec_len(key)); + } + else if (type == IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG) + { + sa = pool_elt_at_index(im->sad, t->output_sa_index); + sa->integ_alg = alg; + sa->integ_key_len = vec_len(key); + memcpy(sa->integ_key, key, vec_len(key)); + } + else if (type == IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO) + { + sa = pool_elt_at_index(im->sad, t->input_sa_index); + sa->crypto_alg = alg; + sa->crypto_key_len = vec_len(key); + memcpy(sa->crypto_key, key, vec_len(key)); + } + else if (type == IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG) + { + sa = pool_elt_at_index(im->sad, t->input_sa_index); + sa->integ_alg = alg; + sa->integ_key_len = vec_len(key); + memcpy(sa->integ_key, key, vec_len(key)); + } + else + return VNET_API_ERROR_INVALID_VALUE; + + return 0; +} + + +clib_error_t * +ipsec_tunnel_if_init (vlib_main_t * vm) +{ + ipsec_main_t * im = &ipsec_main; + + im->ipsec_if_pool_index_by_key = hash_create (0, sizeof (uword)); + + return 0; +} + +VLIB_INIT_FUNCTION (ipsec_tunnel_if_init); + diff --git a/vnet/vnet/ipsec/ipsec_if_in.c b/vnet/vnet/ipsec/ipsec_if_in.c new file mode 100644 index 00000000000..517f8bff7b2 --- /dev/null +++ b/vnet/vnet/ipsec/ipsec_if_in.c @@ -0,0 +1,151 @@ +/* + * ipsec_if_in.c : IPSec interface input node + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> + +#include <vnet/ipsec/ipsec.h> +#include <vnet/ipsec/esp.h> + +/* Statistics (not really errors) */ +#define foreach_ipsec_if_input_error \ +_(RX, "good packets received") + +static char * ipsec_if_input_error_strings[] = { +#define _(sym,string) string, + foreach_ipsec_if_input_error +#undef _ +}; + +typedef enum { +#define _(sym,str) IPSEC_IF_INPUT_ERROR_##sym, + foreach_ipsec_if_input_error +#undef _ + IPSEC_IF_INPUT_N_ERROR, +} ipsec_if_input_error_t; + +typedef enum { + IPSEC_IF_INPUT_NEXT_ESP_DECRYPT, + IPSEC_IF_INPUT_NEXT_DROP, + IPSEC_IF_INPUT_N_NEXT, +} ipsec_if_input_next_t; + +typedef struct { + u32 spi; + u32 seq; +} ipsec_if_input_trace_t; + + +u8 * format_ipsec_if_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ipsec_if_input_trace_t * t + = va_arg (*args, ipsec_if_input_trace_t *); + + s = format (s, "IPSec: spi %u seq %u", t->spi, t->seq); + return s; +} + +static uword +ipsec_if_input_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ipsec_main_t *im = &ipsec_main; + u32 * from, * to_next = 0, next_index; + u32 n_left_from; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t * b0; + ip4_header_t *ip0; + esp_header_t *esp0; + uword * p; + + bi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); + + next0 = IPSEC_IF_INPUT_NEXT_DROP; + + u64 key = (u64) ip0->src_address.as_u32 << 32 | + (u64) clib_net_to_host_u32(esp0->spi); + + p = hash_get (im->ipsec_if_pool_index_by_key, key); + + if (p) + { + ipsec_tunnel_if_t * t; + t = pool_elt_at_index(im->tunnel_interfaces, p[0]); + vnet_buffer(b0)->output_features.ipsec_sad_index = t->input_sa_index; + vlib_buffer_advance(b0, ip4_header_bytes (ip0)); + next0 = IPSEC_IF_INPUT_NEXT_ESP_DECRYPT; + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { + ipsec_if_input_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->spi = clib_host_to_net_u32(esp0->spi); + tr->seq = clib_host_to_net_u32(esp0->seq); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ipsec_if_input_node.index, + IPSEC_IF_INPUT_ERROR_RX, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (ipsec_if_input_node) = { + .function = ipsec_if_input_node_fn, + .name = "ipsec-if-input", + .vector_size = sizeof (u32), + .format_trace = format_ipsec_if_input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ipsec_if_input_error_strings), + .error_strings = ipsec_if_input_error_strings, + + .n_next_nodes = IPSEC_IF_INPUT_N_NEXT, + + .next_nodes = { + [IPSEC_IF_INPUT_NEXT_ESP_DECRYPT] = "esp-decrypt", + [IPSEC_IF_INPUT_NEXT_DROP] = "error-drop", + }, +};
\ No newline at end of file diff --git a/vnet/vnet/ipsec/ipsec_if_out.c b/vnet/vnet/ipsec/ipsec_if_out.c new file mode 100644 index 00000000000..1e1dd52854b --- /dev/null +++ b/vnet/vnet/ipsec/ipsec_if_out.c @@ -0,0 +1,140 @@ +/* + * ipsec_if_out.c : IPSec interface output node + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> + +#include <vnet/ipsec/ipsec.h> + + +/* Statistics (not really errors) */ +#define foreach_ipsec_if_output_error \ +_(TX, "good packets transmitted") + +static char * ipsec_if_output_error_strings[] = { +#define _(sym,string) string, + foreach_ipsec_if_output_error +#undef _ +}; + +typedef enum { +#define _(sym,str) IPSEC_IF_OUTPUT_ERROR_##sym, + foreach_ipsec_if_output_error +#undef _ + IPSEC_IF_OUTPUT_N_ERROR, +} ipsec_if_output_error_t; + +typedef enum { + IPSEC_IF_OUTPUT_NEXT_ESP_ENCRYPT, + IPSEC_IF_OUTPUT_NEXT_DROP, + IPSEC_IF_OUTPUT_N_NEXT, +} ipsec_if_output_next_t; + +typedef struct { + u32 spi; + u32 seq; +} ipsec_if_output_trace_t; + + +u8 * format_ipsec_if_output_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ipsec_if_output_trace_t * t + = va_arg (*args, ipsec_if_output_trace_t *); + + s = format (s, "IPSec: spi %u seq %u", t->spi, t->seq); + return s; +} + +static uword +ipsec_if_output_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ipsec_main_t *im = &ipsec_main; + vnet_main_t * vnm = im->vnet_main; + u32 * from, * to_next = 0, next_index; + u32 n_left_from, sw_if_index0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t * b0; + ipsec_tunnel_if_t * t0; + vnet_hw_interface_t * hi0; + + bi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + t0 = pool_elt_at_index (im->tunnel_interfaces, hi0->dev_instance); + vnet_buffer(b0)->output_features.ipsec_sad_index = t0->output_sa_index; + next0 = IPSEC_IF_OUTPUT_NEXT_ESP_ENCRYPT; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { + ipsec_if_output_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + ipsec_sa_t * sa0 = pool_elt_at_index(im->sad, t0->output_sa_index); + tr->spi = sa0->spi; + tr->seq = sa0->seq; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ipsec_if_output_node.index, + IPSEC_IF_OUTPUT_ERROR_TX, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (ipsec_if_output_node) = { + .function = ipsec_if_output_node_fn, + .name = "ipsec-if-output", + .vector_size = sizeof (u32), + .format_trace = format_ipsec_if_output_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ipsec_if_output_error_strings), + .error_strings = ipsec_if_output_error_strings, + + .n_next_nodes = IPSEC_IF_OUTPUT_N_NEXT, + + .next_nodes = { + [IPSEC_IF_OUTPUT_NEXT_ESP_ENCRYPT] = "esp-encrypt", + [IPSEC_IF_OUTPUT_NEXT_DROP] = "error-drop", + }, +}; + diff --git a/vnet/vnet/ipsec/ipsec_input.c b/vnet/vnet/ipsec/ipsec_input.c new file mode 100644 index 00000000000..abb4a47485a --- /dev/null +++ b/vnet/vnet/ipsec/ipsec_input.c @@ -0,0 +1,406 @@ +/* + * decap.c : IPSec tunnel decapsulation + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> + +#include <vnet/ipsec/ipsec.h> +#include <vnet/ipsec/esp.h> + +#define foreach_ipsec_input_next \ +_(DROP, "error-drop") \ +_(ESP_DECRYPT, "esp-decrypt") + +#define _(v, s) IPSEC_INPUT_NEXT_##v, +typedef enum { + foreach_ipsec_input_next +#undef _ + IPSEC_INPUT_N_NEXT, +} ipsec_input_next_t; + + +#define foreach_ipsec_input_error \ + _(RX_PKTS, "IPSEC pkts received") \ + _(DECRYPTION_FAILED, "IPSEC decryption failed") + + +typedef enum { +#define _(sym,str) IPSEC_INPUT_ERROR_##sym, + foreach_ipsec_input_error +#undef _ + IPSEC_INPUT_N_ERROR, +} ipsec_input_error_t; + +static char * ipsec_input_error_strings[] = { +#define _(sym,string) string, + foreach_ipsec_input_error +#undef _ +}; + +vlib_node_registration_t ipsec_input_node; + +typedef struct { + u32 tunnel_index; + u32 spi; + u32 seq; +} ipsec_input_trace_t; + +/* packet trace format function */ +static u8 * format_ipsec_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ipsec_input_trace_t * t = va_arg (*args, ipsec_input_trace_t *); + + if (t->tunnel_index != ~0) + { + s = format (s, "esp: tunnel %u spi %u seq %u", t->tunnel_index, t->spi, t->seq); + } + else + { + s = format (s, "esp: no tunnel spi %u seq %u",t->spi, t->seq); + } + return s; +} + +always_inline ipsec_policy_t * +ipsec_input_protect_policy_match(ipsec_spd_t * spd, u32 sa, u32 da, u32 spi) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_policy_t * p; + ipsec_sa_t * s; + u32 * i; + + vec_foreach(i, spd->ipv4_inbound_protect_policy_indices) + { + p = pool_elt_at_index(spd->policies, *i); + s = pool_elt_at_index(im->sad, p->sa_index); + + if (spi != s->spi) + continue; + + if (s->is_tunnel) + { + if (da != clib_net_to_host_u32(s->tunnel_dst_addr.ip4.as_u32)) + continue; + + if (sa != clib_net_to_host_u32(s->tunnel_src_addr.ip4.as_u32)) + continue; + + return p; + } + + if (da < clib_net_to_host_u32(p->laddr.start.ip4.as_u32)) + continue; + + if (da > clib_net_to_host_u32(p->laddr.stop.ip4.as_u32)) + continue; + + if (sa < clib_net_to_host_u32(p->raddr.start.ip4.as_u32)) + continue; + + if (sa > clib_net_to_host_u32(p->raddr.stop.ip4.as_u32)) + continue; + + return p; + } + return 0; +} + +always_inline uword +ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la, ip6_address_t * ua) +{ + if ((memcmp(a->as_u64, la->as_u64, 2 * sizeof(u64)) >= 0) && + (memcmp(a->as_u64, ua->as_u64, 2 * sizeof(u64)) <= 0)) + return 1; + return 0; +} + +always_inline ipsec_policy_t * +ipsec_input_ip6_protect_policy_match (ipsec_spd_t * spd, + ip6_address_t * sa, + ip6_address_t * da, + u32 spi) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_policy_t * p; + ipsec_sa_t * s; + u32 * i; + + vec_foreach(i, spd->ipv6_inbound_protect_policy_indices) + { + p = pool_elt_at_index(spd->policies, *i); + s = pool_elt_at_index(im->sad, p->sa_index); + + if (spi != s->spi) + continue; + + if (s->is_tunnel) + { + if (!ip6_address_is_equal(sa, &s->tunnel_src_addr.ip6)) + continue; + + if (!ip6_address_is_equal(da, &s->tunnel_dst_addr.ip6)) + continue; + + return p; + } + + if (!ip6_addr_match_range(sa, &p->raddr.start.ip6, &p->raddr.stop.ip6)) + continue; + + if (!ip6_addr_match_range(da, &p->laddr.start.ip6, &p->laddr.stop.ip6)) + continue; + + return p; + } + return 0; +} + +static uword +ipsec_input_ip4_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip4_main_t * i4m = &ip4_main; + ip_lookup_main_t * lm = &i4m->lookup_main; + ip_config_main_t * cm = &lm->rx_config_mains[VNET_UNICAST]; + u32 n_left_from, *from, next_index, *to_next; + ipsec_main_t *im = &ipsec_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t * b0; + ip4_header_t *ip0; + esp_header_t *esp0; + ip4_ipsec_config_t * c0; + u32 tunnel_index0 = ~0; + ipsec_spd_t * spd0; + + bi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + c0 = vnet_get_config_data (&cm->config_main, + &vnet_buffer (b0)->ip.current_config_index, + &next0, sizeof (c0[0])); + + spd0 = pool_elt_at_index(im->spds, c0->spd_index); + + ip0 = vlib_buffer_get_current (b0); + esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); + + if (PREDICT_TRUE(ip0->protocol == IP_PROTOCOL_IPSEC_ESP)) + { +#if 0 + clib_warning("packet received from %U to %U spi %u size %u spd_id %u", + format_ip4_address, ip0->src_address.as_u8, + format_ip4_address, ip0->dst_address.as_u8, + clib_net_to_host_u32(esp0->spi), + clib_net_to_host_u16(ip0->length), + spd0->id); +#endif + ipsec_policy_t * p0; + p0 = ipsec_input_protect_policy_match(spd0, + clib_net_to_host_u32(ip0->src_address.as_u32), + clib_net_to_host_u32(ip0->dst_address.as_u32), + clib_net_to_host_u32(esp0->spi)); + + if (PREDICT_TRUE(p0 != 0)) + { + p0->counter.packets++; + p0->counter.bytes += clib_net_to_host_u16(ip0->length); + vnet_buffer(b0)->output_features.ipsec_sad_index = p0->sa_index; + next0 = IPSEC_INPUT_NEXT_ESP_DECRYPT; + vlib_buffer_advance(b0, ip4_header_bytes (ip0)); + goto trace0; + } + } + + /* FIXME bypass and discard */ + +trace0: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { + ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->tunnel_index = tunnel_index0; + tr->spi = clib_host_to_net_u32(esp0->spi); + tr->seq = clib_host_to_net_u32(esp0->seq); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, ipsec_input_ip4_node.index, + IPSEC_INPUT_ERROR_RX_PKTS, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + + +VLIB_REGISTER_NODE (ipsec_input_ip4_node) = { + .function = ipsec_input_ip4_node_fn, + .name = "ipsec-input-ip4", + .vector_size = sizeof (u32), + .format_trace = format_ipsec_input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ipsec_input_error_strings), + .error_strings = ipsec_input_error_strings, + + .n_next_nodes = IPSEC_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [IPSEC_INPUT_NEXT_##s] = n, + foreach_ipsec_input_next +#undef _ + }, +}; + + +static uword +ipsec_input_ip6_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_main_t * i6m = &ip6_main; + ip_lookup_main_t * lm = &i6m->lookup_main; + ip_config_main_t * cm = &lm->rx_config_mains[VNET_UNICAST]; + u32 n_left_from, *from, next_index, *to_next; + ipsec_main_t *im = &ipsec_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t * b0; + ip6_header_t *ip0; + esp_header_t *esp0; + ip4_ipsec_config_t * c0; + u32 tunnel_index0 = ~0; + ipsec_spd_t * spd0; + u32 header_size = sizeof(ip0[0]); + + bi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + c0 = vnet_get_config_data (&cm->config_main, + &vnet_buffer (b0)->ip.current_config_index, + &next0, sizeof (c0[0])); + + spd0 = pool_elt_at_index(im->spds, c0->spd_index); + + ip0 = vlib_buffer_get_current (b0); + esp0 = (esp_header_t *) ((u8 *) ip0 + header_size); + + if (PREDICT_TRUE(ip0->protocol == IP_PROTOCOL_IPSEC_ESP)) + { +#if 0 + clib_warning("packet received from %U to %U spi %u size %u spd_id %u", + format_ip6_address, &ip0->src_address, + format_ip6_address, &ip0->dst_address, + clib_net_to_host_u32(esp0->spi), + clib_net_to_host_u16(ip0->payload_length) + header_size, + spd0->id); +#endif + ipsec_policy_t * p0; + p0 = ipsec_input_ip6_protect_policy_match(spd0, + &ip0->src_address, + &ip0->dst_address, + clib_net_to_host_u32(esp0->spi)); + + if (PREDICT_TRUE(p0 != 0)) + { + p0->counter.packets++; + p0->counter.bytes += clib_net_to_host_u16(ip0->payload_length); + p0->counter.bytes += header_size; + vnet_buffer(b0)->output_features.ipsec_sad_index = p0->sa_index; + next0 = IPSEC_INPUT_NEXT_ESP_DECRYPT; + vlib_buffer_advance(b0, header_size); + goto trace0; + } + } + +trace0: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { + ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->tunnel_index = tunnel_index0; + tr->spi = clib_host_to_net_u32(esp0->spi); + tr->seq = clib_host_to_net_u32(esp0->seq); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, ipsec_input_ip6_node.index, + IPSEC_INPUT_ERROR_RX_PKTS, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + + +VLIB_REGISTER_NODE (ipsec_input_ip6_node) = { + .function = ipsec_input_ip6_node_fn, + .name = "ipsec-input-ip6", + .vector_size = sizeof (u32), + .format_trace = format_ipsec_input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ipsec_input_error_strings), + .error_strings = ipsec_input_error_strings, + + .n_next_nodes = IPSEC_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [IPSEC_INPUT_NEXT_##s] = n, + foreach_ipsec_input_next +#undef _ + }, +}; diff --git a/vnet/vnet/ipsec/ipsec_output.c b/vnet/vnet/ipsec/ipsec_output.c new file mode 100644 index 00000000000..77b39fa9ee4 --- /dev/null +++ b/vnet/vnet/ipsec/ipsec_output.c @@ -0,0 +1,405 @@ +/* + * ipsec_output.c : IPSec output node + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> + +#include <vnet/ipsec/ipsec.h> + + +#define foreach_ipsec_output_next \ +_(DROP, "error-drop") \ +_(ESP_ENCRYPT, "esp-encrypt") + +#define _(v, s) IPSEC_OUTPUT_NEXT_##v, +typedef enum { + foreach_intf_output_feat + foreach_ipsec_output_next +#undef _ + IPSEC_OUTPUT_N_NEXT, +} ipsec_output_next_t; + + +#define foreach_ipsec_output_error \ + _(RX_PKTS, "IPSec pkts received") \ + _(POLICY_DISCARD, "IPSec policy discard") \ + _(POLICY_NO_MATCH, "IPSec policy (no match)") \ + _(POLICY_PROTECT, "IPSec policy protect") \ + _(POLICY_BYPASS, "IPSec policy bypass") \ + _(ENCAPS_FAILED, "IPSec encapsulation failed") + + +typedef enum { +#define _(sym,str) IPSEC_OUTPUT_ERROR_##sym, + foreach_ipsec_output_error +#undef _ + IPSEC_DECAP_N_ERROR, +} ipsec_output_error_t; + +static char * ipsec_output_error_strings[] = { +#define _(sym,string) string, + foreach_ipsec_output_error +#undef _ +}; + +vlib_node_registration_t ipsec_output_node; + +typedef struct { + u32 spd_id; +} ipsec_output_trace_t; + +/* packet trace format function */ +static u8 * format_ipsec_output_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ipsec_output_trace_t * t = va_arg (*args, ipsec_output_trace_t *); + + if (t->spd_id != ~0) + { + s = format (s, "spd %u ", t->spd_id); + } + else + { + s = format (s, "no spd"); + } + return s; +} + +always_inline intf_output_feat_t +get_next_intf_output_feature_and_reset_bit(vlib_buffer_t *b) +{ + u32 next_feature; + count_trailing_zeros(next_feature, vnet_buffer(b)->output_features.bitmap); + if (next_feature != INTF_OUTPUT_FEAT_DONE) + vnet_buffer(b)->output_features.bitmap &= ~(1 << next_feature); + return next_feature; +} + +always_inline ipsec_policy_t * +ipsec_output_policy_match(ipsec_spd_t * spd, u8 pr, u32 la, u32 ra, u16 lp, u16 rp) +{ + ipsec_policy_t * p; + u32 * i; + + vec_foreach(i, spd->ipv4_outbound_policies) + { + p = pool_elt_at_index(spd->policies, *i); + if (PREDICT_FALSE(p->protocol && (p->protocol != pr))) + continue; + + if (la < clib_net_to_host_u32(p->laddr.start.ip4.as_u32)) + continue; + + if (la > clib_net_to_host_u32(p->laddr.stop.ip4.as_u32)) + continue; + + if (ra < clib_net_to_host_u32(p->raddr.start.ip4.as_u32)) + continue; + + if (ra > clib_net_to_host_u32(p->raddr.stop.ip4.as_u32)) + continue; + + if (PREDICT_FALSE((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP))) + return p; + + if (lp < p->lport.start) + continue; + + if (lp > p->lport.stop) + continue; + + if (rp < p->rport.start) + continue; + + if (rp > p->rport.stop) + continue; + + return p; + } + return 0; +} + +always_inline uword +ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la, ip6_address_t * ua) +{ + if ((memcmp(a->as_u64, la->as_u64, 2 * sizeof(u64)) >= 0) && + (memcmp(a->as_u64, ua->as_u64, 2 * sizeof(u64)) <= 0)) + return 1; + return 0; +} + +always_inline ipsec_policy_t * +ipsec_output_ip6_policy_match (ipsec_spd_t * spd, + ip6_address_t * sa, + ip6_address_t * da, + u16 lp, + u16 rp, + u8 pr) +{ + ipsec_policy_t * p; + u32 * i; + + vec_foreach(i, spd->ipv6_outbound_policies) + { + p = pool_elt_at_index(spd->policies, *i); + if (PREDICT_FALSE(p->protocol && (p->protocol != pr))) + continue; + + if (!ip6_addr_match_range(sa, &p->raddr.start.ip6, &p->raddr.stop.ip6)) + continue; + + if (!ip6_addr_match_range(da, &p->laddr.start.ip6, &p->laddr.stop.ip6)) + continue; + + if (PREDICT_FALSE((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP))) + return p; + + if (lp < p->lport.start) + continue; + + if (lp > p->lport.stop) + continue; + + if (rp < p->rport.start) + continue; + + if (rp > p->rport.stop) + continue; + + return p; + } + + return 0; +} +static uword +ipsec_output_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ipsec_main_t *im = &ipsec_main; + vnet_main_t * vnm = im->vnet_main; + + u32 * from, * to_next = 0; + u32 n_left_from, sw_if_index0, last_sw_if_index = (u32) ~0; + u32 next_node_index = (u32)~0, last_next_node_index = (u32) ~0; + vlib_frame_t *f = 0; + u32 spd_index0 = ~0; + ipsec_spd_t * spd0 = 0; + u64 nc_protect = 0, nc_bypass = 0, nc_discard = 0, nc_nomatch = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t * b0; + ipsec_policy_t * p0; + ip4_header_t * ip0; + ip6_header_t * ip6_0 = 0; + udp_header_t * udp0; + u8 is_ipv6 = 0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + + + ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + + sizeof(ethernet_header_t)); + + /* just forward non ipv4 packets */ + if (PREDICT_FALSE((ip0->ip_version_and_header_length & 0xF0 ) != 0x40)) + { + /* ipv6 packets */ + if (PREDICT_TRUE((ip0->ip_version_and_header_length & 0xF0 ) == 0x60)) + { + is_ipv6 = 1; + ip6_0 = (ip6_header_t *) ((u8 *) vlib_buffer_get_current (b0) + + sizeof(ethernet_header_t)); + } + else + { + next_node_index = get_next_output_feature_node_index(vnm, b0); + goto dispatch0; + } + } + + /* lookup for SPD only if sw_if_index is changed */ + if (PREDICT_FALSE(last_sw_if_index != sw_if_index0)) + { + uword * p = hash_get (im->spd_index_by_sw_if_index, sw_if_index0); + ASSERT(p); + spd_index0 = p[0]; + spd0 = pool_elt_at_index(im->spds, spd_index0); + last_sw_if_index = sw_if_index0; + } + + if (is_ipv6) + { + udp0 = ip6_next_header(ip6_0); +#if 0 + clib_warning("packet received from %U port %u to %U port %u spd_id %u", + format_ip6_address, &ip6_0->src_address, + clib_net_to_host_u16(udp0->src_port), + format_ip6_address, &ip6_0->dst_address, + clib_net_to_host_u16(udp0->dst_port), + spd0->id); +#endif + + p0 = ipsec_output_ip6_policy_match(spd0, + &ip6_0->src_address, + &ip6_0->dst_address, + clib_net_to_host_u16(udp0->src_port), + clib_net_to_host_u16(udp0->dst_port), + ip6_0->protocol); + } + else + { + udp0 = (udp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); + +#if 0 + clib_warning("packet received from %U to %U port %u", + format_ip4_address, ip0->src_address.as_u8, + format_ip4_address, ip0->dst_address.as_u8, + clib_net_to_host_u16(udp0->dst_port)); + clib_warning("sw_if_index0 %u spd_index0 %u spd_id %u", + sw_if_index0, spd_index0, spd0->id); +#endif + + p0 = ipsec_output_policy_match(spd0, ip0->protocol, + clib_net_to_host_u32(ip0->src_address.as_u32), + clib_net_to_host_u32(ip0->dst_address.as_u32), + clib_net_to_host_u16(udp0->src_port), + clib_net_to_host_u16(udp0->dst_port)); + } + + if (PREDICT_TRUE(p0 != NULL)) + { + if (p0->policy == IPSEC_POLICY_ACTION_PROTECT) + { + nc_protect++; + next_node_index = im->esp_encrypt_node_index; + vnet_buffer(b0)->output_features.ipsec_sad_index = p0->sa_index; + vlib_buffer_advance(b0, sizeof(ethernet_header_t)); + p0->counter.packets++; + if (is_ipv6) + { + p0->counter.bytes += clib_net_to_host_u16(ip6_0->payload_length); + p0->counter.bytes += sizeof(ip6_header_t); + } + else + { + p0->counter.bytes += clib_net_to_host_u16(ip0->length); + } + } + else if (p0->policy == IPSEC_POLICY_ACTION_BYPASS) + { + nc_bypass++; + next_node_index = get_next_output_feature_node_index(vnm, b0); + p0->counter.packets++; + if (is_ipv6) + { + p0->counter.bytes += clib_net_to_host_u16(ip6_0->payload_length); + p0->counter.bytes += sizeof(ip6_header_t); + } + else + { + p0->counter.bytes += clib_net_to_host_u16(ip0->length); + } + } + else + { + nc_discard++; + p0->counter.packets++; + if (is_ipv6) + { + p0->counter.bytes += clib_net_to_host_u16(ip6_0->payload_length); + p0->counter.bytes += sizeof(ip6_header_t); + } + else + { + p0->counter.bytes += clib_net_to_host_u16(ip0->length); + } + next_node_index = im->error_drop_node_index; + } + } + else + { + nc_nomatch++; + next_node_index = im->error_drop_node_index; + } + +dispatch0: + from += 1; + n_left_from -= 1; + + if (PREDICT_FALSE((last_next_node_index != next_node_index))) + { + /* if this is not 1st frame */ + if (f) + vlib_put_frame_to_node (vm, last_next_node_index, f); + + last_next_node_index = next_node_index; + + f = vlib_get_frame_to_node(vm, next_node_index); + to_next = vlib_frame_vector_args (f); + } + + to_next[0] = bi0; + to_next+=1; + f->n_vectors++; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { + ipsec_output_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + if (spd0) + tr->spd_id = spd0->id; + } + } + + vlib_put_frame_to_node (vm, next_node_index, f); + vlib_node_increment_counter (vm, ipsec_output_node.index, + IPSEC_OUTPUT_ERROR_POLICY_PROTECT, nc_protect); + vlib_node_increment_counter (vm, ipsec_output_node.index, + IPSEC_OUTPUT_ERROR_POLICY_BYPASS, nc_bypass); + vlib_node_increment_counter (vm, ipsec_output_node.index, + IPSEC_OUTPUT_ERROR_POLICY_DISCARD, nc_discard); + vlib_node_increment_counter (vm, ipsec_output_node.index, + IPSEC_OUTPUT_ERROR_POLICY_NO_MATCH, nc_nomatch); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (ipsec_output_node) = { + .function = ipsec_output_node_fn, + .name = "ipsec-output", + .vector_size = sizeof (u32), + .format_trace = format_ipsec_output_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(ipsec_output_error_strings), + .error_strings = ipsec_output_error_strings, + + .n_next_nodes = IPSEC_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [IPSEC_OUTPUT_NEXT_##s] = n, + foreach_intf_output_feat + foreach_ipsec_output_next +#undef _ + }, +}; diff --git a/vnet/vnet/l2/feat_bitmap.c b/vnet/vnet/l2/feat_bitmap.c new file mode 100644 index 00000000000..74917cda3ae --- /dev/null +++ b/vnet/vnet/l2/feat_bitmap.c @@ -0,0 +1,166 @@ +/* + * feat_bitmap.c: bitmap for managing feature invocation + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + + +// Drop node for feature bitmaps +// For features that just do a drop, or are not yet implemented. +// Initial feature dispatch nodes don't need to set b0->error +// in case of a possible drop because that will be done here. +// The next node is always error-drop. + + +static vlib_node_registration_t feat_bitmap_drop_node; + +#define foreach_feat_bitmap_drop_error \ +_(NO_FWD, "L2 feature forwarding disabled") \ +_(NYI, "L2 feature not implemented") + +typedef enum { +#define _(sym,str) FEAT_BITMAP_DROP_ERROR_##sym, + foreach_feat_bitmap_drop_error +#undef _ + FEAT_BITMAP_DROP_N_ERROR, +} feat_bitmap_drop_error_t; + +static char * feat_bitmap_drop_error_strings[] = { +#define _(sym,string) string, + foreach_feat_bitmap_drop_error +#undef _ +}; + +typedef enum { + FEAT_BITMAP_DROP_NEXT_DROP, + FEAT_BITMAP_DROP_N_NEXT, +} feat_bitmap_drop_next_t; + +typedef struct { + u32 feature_bitmap; +} feat_bitmap_drop_trace_t; + +/* packet trace format function */ +static u8 * format_feat_bitmap_drop_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + feat_bitmap_drop_trace_t * t = va_arg (*args, feat_bitmap_drop_trace_t *); + + s = format (s, "feat_bitmap_drop: feature bitmap 0x%08x", t->feature_bitmap); + return s; +} + +static uword +feat_bitmap_drop_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + feat_bitmap_drop_next_t next_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + feat_bitmap_drop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->feature_bitmap = vnet_buffer(b0)->l2.feature_bitmap; + } + + if (vnet_buffer(b0)->l2.feature_bitmap == 1) { + // If we are executing the last feature, this is the + // No forwarding catch-all + b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NO_FWD]; + } else { + b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NYI]; + } + next0 = FEAT_BITMAP_DROP_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +clib_error_t *feat_bitmap_drop_init (vlib_main_t *vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (feat_bitmap_drop_init); + +VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = { + .function = feat_bitmap_drop_node_fn, + .name = "feature-bitmap-drop", + .vector_size = sizeof (u32), + .format_trace = format_feat_bitmap_drop_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(feat_bitmap_drop_error_strings), + .error_strings = feat_bitmap_drop_error_strings, + + .n_next_nodes = FEAT_BITMAP_DROP_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [FEAT_BITMAP_DROP_NEXT_DROP] = "error-drop", + }, +}; + + diff --git a/vnet/vnet/l2/feat_bitmap.h b/vnet/vnet/l2/feat_bitmap.h new file mode 100644 index 00000000000..7dd36a7712e --- /dev/null +++ b/vnet/vnet/l2/feat_bitmap.h @@ -0,0 +1,80 @@ +/* + * feat_bitmap.h: bitmap for managing feature invocation + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_feat_bitmap_h +#define included_vnet_l2_feat_bitmap_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +/* + * The feature bitmap is a way of organizing input and output feature graph nodes. + * The set of features to be executed are arranged in a bitmap with one bit per + * feature and each bit positioned in the same order that the features should be + * executed. Features can be dynamically removed from the set by masking off their + * corresponding bits. The bitmap is stored in packet context. Each feature clears + * its bit and then calls feat_bitmap_get_next_node_index() to go to the next + * graph node. + */ + + +// 32 features in a u32 bitmap +#define FEAT_MAX 32 + +// Initialize the feature next-node indexes of a graph node. +// Should be called by the init function of each feature graph node. +always_inline +void feat_bitmap_init_next_nodes ( + vlib_main_t * vm, + u32 node_index, // the current graph node index + u32 num_features, // number of entries in feat_names + char ** feat_names, // array of feature graph node names + u32 * next_nodes) // array of 32 next indexes to init +{ + u32 idx; + + ASSERT(num_features <= FEAT_MAX); + + for (idx=0; idx<num_features; idx++) { + if (vlib_get_node_by_name(vm, (u8 *) feat_names[idx])) { + next_nodes[idx] = + vlib_node_add_named_next(vm, node_index, feat_names[idx]); + } else { // Node may be in plugin which is not installed, use drop node + next_nodes[idx] = + vlib_node_add_named_next(vm, node_index, "feature-bitmap-drop"); + } + } + + // All unassigned bits go to the drop node + for (; idx<FEAT_MAX; idx++) { + next_nodes[idx] = vlib_node_add_named_next(vm, node_index, "feature-bitmap-drop"); + } +} + +// Return the graph node index for the feature corresponding to the +// first set bit in the bitmap. +always_inline +u32 feat_bitmap_get_next_node_index (u32 * next_nodes, u32 bitmap) +{ + u32 first_bit; + + count_leading_zeros(first_bit, bitmap); + first_bit = uword_bits - 1 - first_bit; + return next_nodes[first_bit]; +} + +#endif // included_vnet_l2_feat_bitmap_h diff --git a/vnet/vnet/l2/l2_bd.c b/vnet/vnet/l2/l2_bd.c new file mode 100644 index 00000000000..24f96d5749c --- /dev/null +++ b/vnet/vnet/l2/l2_bd.c @@ -0,0 +1,695 @@ +/* + * l2_bd.c : layer 2 bridge domain + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vlib/cli.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/format.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_bd.h> +#include <vnet/l2/l2_fib.h> +#include <vnet/l2/l2_vtr.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/vec.h> + +bd_main_t bd_main; + +// Init bridge domain if not done already +// For feature bitmap, set all bits except ARP termination +inline void +bd_validate (l2_bridge_domain_t * bd_config) +{ + if (!bd_is_valid (bd_config)) { + bd_config->feature_bitmap = ~L2INPUT_FEAT_ARP_TERM; + bd_config->bvi_sw_if_index = ~0; + bd_config->members = 0; + bd_config->mac_by_ip4 = 0; +// bd_config->mac_by_ip6 = hash_create_mem (0, sizeof(ip6_address_t), +// sizeof(uword)); + } +} + +u32 bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id) +{ + uword * p; + u32 rv; + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) + return (p[0]); + + rv = clib_bitmap_first_clear (bdm->bd_index_bitmap); + + // mark this index busy + bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, rv, 1); + + hash_set (bdm->bd_index_by_bd_id, bd_id, rv); + + vec_validate (l2input_main.bd_configs, rv); + l2input_main.bd_configs[rv].bd_id = bd_id; + + return rv; +} + +int bd_delete_bd_index (bd_main_t * bdm, u32 bd_id) +{ + uword * p; + u32 bd_index; + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + return -1; + + bd_index = p[0]; + + // mark this index clear + bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, bd_index, 0); + hash_unset (bdm->bd_index_by_bd_id, bd_id); + + l2input_main.bd_configs[bd_index].bd_id = ~0; + l2input_main.bd_configs[bd_index].feature_bitmap = 0; + + return 0; +} + +void +bd_add_member (l2_bridge_domain_t * bd_config, + l2_flood_member_t * member) +{ + // Add one element to the vector + + // When flooding, the bvi interface (if present) must be the last member + // processed due to how BVI processing can change the packet. To enable + // this order, we make the bvi interface the first in the vector and + // flooding walks the vector in reverse. + if ((member->flags == L2_FLOOD_MEMBER_NORMAL) || + (vec_len(bd_config->members) == 0)) { + vec_add1 (bd_config->members, *member); + + } else { + // Move 0th element to the end + vec_add1 (bd_config->members, bd_config->members[0]); + bd_config->members[0] = *member; + } +} + + +#define BD_REMOVE_ERROR_OK 0 +#define BD_REMOVE_ERROR_NOT_FOUND 1 + +u32 +bd_remove_member (l2_bridge_domain_t * bd_config, + u32 sw_if_index) +{ + u32 ix; + + // Find and delete the member + vec_foreach_index(ix, bd_config->members) { + if (vec_elt(bd_config->members, ix).sw_if_index == sw_if_index) { + vec_del1 (bd_config->members, ix); + return BD_REMOVE_ERROR_OK; + } + } + + return BD_REMOVE_ERROR_NOT_FOUND; +} + + +clib_error_t *l2bd_init (vlib_main_t *vm) +{ + bd_main_t *bdm = &bd_main; + u32 bd_index; + bdm->bd_index_by_bd_id = hash_create (0, sizeof(uword)); + // create a dummy bd with bd_id of 0 and bd_index of 0 with feature set + // to packet drop only. Thus, packets received from any L2 interface with + // uninitialized bd_index of 0 can be dropped safely. + bd_index = bd_find_or_add_bd_index (bdm, 0); + ASSERT (bd_index == 0); + l2input_main.bd_configs[0].feature_bitmap = L2INPUT_FEAT_DROP; + return 0; +} + +VLIB_INIT_FUNCTION (l2bd_init); + + +// Set the learn/forward/flood flags for the bridge domain +// Return 0 if ok, non-zero if for an error. +u32 +bd_set_flags (vlib_main_t * vm, + u32 bd_index, + u32 flags, + u32 enable) { + + l2_bridge_domain_t * bd_config; + u32 feature_bitmap = 0; + + vec_validate (l2input_main.bd_configs, bd_index); + bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index); + + bd_validate (bd_config); + + if (flags & L2_LEARN) { + feature_bitmap |= L2INPUT_FEAT_LEARN; + } + if (flags & L2_FWD) { + feature_bitmap |= L2INPUT_FEAT_FWD; + } + if (flags & L2_FLOOD) { + feature_bitmap |= L2INPUT_FEAT_FLOOD; + } + if (flags & L2_UU_FLOOD) { + feature_bitmap |= L2INPUT_FEAT_UU_FLOOD; + } + if (flags & L2_ARP_TERM) { + feature_bitmap |= L2INPUT_FEAT_ARP_TERM; + } + + if (enable) { + bd_config->feature_bitmap |= feature_bitmap; + } else { + bd_config->feature_bitmap &= ~feature_bitmap; + } + + return 0; +} + +// set bridge-domain learn enable/disable +// The CLI format is: +// set bridge-domain learn <bd_id> [disable] +static clib_error_t * +bd_learn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 enable; + uword * p; + + if (! unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the bridge domain flag + if (bd_set_flags(vm, bd_index, L2_LEARN, enable)) { + error = clib_error_return (0, "bridge-domain id %d out of range", bd_index); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (bd_learn_cli, static) = { + .path = "set bridge-domain learn", + .short_help = "set bridge-domain learn <bridge-domain-id> [disable]", + .function = bd_learn, +}; + +// set bridge-domain forward enable/disable +// The CLI format is: +// set bridge-domain forward <bd_index> [disable] +static clib_error_t * +bd_fwd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 enable; + uword * p; + + if (! unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the bridge domain flag + if (bd_set_flags(vm, bd_index, L2_FWD, enable)) { + error = clib_error_return (0, "bridge-domain id %d out of range", bd_index); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (bd_fwd_cli, static) = { + .path = "set bridge-domain forward", + .short_help = "set bridge-domain forward <bridge-domain-id> [disable]", + .function = bd_fwd, +}; + +// set bridge-domain flood enable/disable +// The CLI format is: +// set bridge-domain flood <bd_index> [disable] +static clib_error_t * +bd_flood (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 enable; + uword * p; + + if (! unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the bridge domain flag + if (bd_set_flags(vm, bd_index, L2_FLOOD, enable)) { + error = clib_error_return (0, "bridge-domain id %d out of range", bd_index); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (bd_flood_cli, static) = { + .path = "set bridge-domain flood", + .short_help = "set bridge-domain flood <bridge-domain-id> [disable]", + .function = bd_flood, +}; + +// set bridge-domain unkown-unicast flood enable/disable +// The CLI format is: +// set bridge-domain uu-flood <bd_index> [disable] +static clib_error_t * +bd_uu_flood (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 enable; + uword * p; + + if (! unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the bridge domain flag + if (bd_set_flags(vm, bd_index, L2_UU_FLOOD, enable)) { + error = clib_error_return (0, "bridge-domain id %d out of range", bd_index); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (bd_uu_flood_cli, static) = { + .path = "set bridge-domain uu-flood", + .short_help = "set bridge-domain uu-flood <bridge-domain-id> [disable]", + .function = bd_uu_flood, +}; + +// set bridge-domain arp term enable/disable +// The CLI format is: +// set bridge-domain arp term <bridge-domain-id> [disable] +static clib_error_t * +bd_arp_term (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 enable; + uword * p; + + if (! unformat (input, "%d", &bd_id)) { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) bd_index = *p; + else return clib_error_return (0, "No such bridge domain %d", bd_id); + + enable = 1; + if (unformat (input, "disable")) enable = 0; + + // set the bridge domain flag + if (bd_set_flags(vm, bd_index, L2_ARP_TERM, enable)) { + error = clib_error_return (0, "bridge-domain id %d out of range", bd_index); + goto done; + } + +done: + return error; +} + +VLIB_CLI_COMMAND (bd_arp_term_cli, static) = { + .path = "set bridge-domain arp term", + .short_help = "set bridge-domain arp term <bridge-domain-id> [disable]", + .function = bd_arp_term, +}; + + +// The clib hash implementation stores uword entries in the hash table. +// The hash table mac_by_ip4 is keyed via IP4 address and store the +// 6-byte MAC address directly in the hash table entry uword. +// This only works for 64-bit processor with 8-byte uword; which means +// this code *WILL NOT WORK* for a 32-bit prcessor with 4-byte uword. +u32 bd_add_del_ip_mac(u32 bd_index, + u8 *ip_addr, + u8 *mac_addr, + u8 is_ip6, + u8 is_add) +{ + l2input_main_t * l2im = &l2input_main; + l2_bridge_domain_t * bd_cfg = l2input_bd_config_from_index (l2im, bd_index); + u64 new_mac = *(u64 *) mac_addr; + u64 * old_mac; + u16 * mac16 = (u16 *) &new_mac; + + ASSERT (sizeof(uword) == sizeof(u64)); // make sure uword is 8 bytes + + mac16[3] = 0; // Clear last 2 unsed bytes of the 8-byte MAC address + if (is_ip6) { + // ip6_address_t ip6_addr = *(ip6_address_t *) ip_addr; + return 1; // not yet implemented + } else { + ip4_address_t ip4_addr = *(ip4_address_t *) ip_addr; + old_mac = (u64 *) hash_get (bd_cfg->mac_by_ip4, ip4_addr.as_u32); + if (is_add) { + if (old_mac && (*old_mac == new_mac)) return 0; // mac entry already exist + hash_set (bd_cfg->mac_by_ip4, ip4_addr.as_u32, new_mac); + } else { + if (old_mac && (*old_mac == new_mac)) { // mac entry match + hash_unset (bd_cfg->mac_by_ip4, ip4_addr.as_u32); // clear entry + } else { + return 1; + } + } + return 0; + } +} + +// set bridge-domain arp entry add/delete +// The CLI format is: +// set bridge-domain arp entry <bd-id> <ip-addr> <mac-addr> [del] +static clib_error_t * +bd_arp_entry (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u8 is_add = 1; + u8 is_ip6 = 0; + u8 ip_addr[16]; + u8 mac_addr[6]; + uword * p; + + if (! unformat (input, "%d", &bd_id)) { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p) bd_index = *p; + else return clib_error_return (0, "No such bridge domain %d", bd_id); + + if (unformat (input, "%U", unformat_ip4_address, ip_addr)) { + is_ip6 = 0; + } else if (unformat (input, "%U", unformat_ip6_address, ip_addr)) { + is_ip6 = 1; + } else { + error = clib_error_return (0, "expecting IP address but got `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat(input, "%U", unformat_ethernet_address, mac_addr)) { + error = clib_error_return (0, "expecting MAC address but got `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "del")) { + is_add = 0; + } + + // set the bridge domain flagAdd IP-MAC entry into bridge domain + if (bd_add_del_ip_mac(bd_index, ip_addr, mac_addr, is_ip6, is_add)) { + error = clib_error_return (0, "MAC %s for IP %U and MAC %U failed", + is_add ? "add" : "del", + format_ip4_address, ip_addr, + format_ethernet_address, mac_addr); + } + +done: + return error; +} + +VLIB_CLI_COMMAND (bd_arp_entry_cli, static) = { + .path = "set bridge-domain arp entry", + .short_help = "set bridge-domain arp entry <bd-id> <ip-addr> <mac-addr> [del]", + .function = bd_arp_entry, +}; + +u8* format_vtr(u8 * s, va_list *args) +{ + u32 vtr_op = va_arg (*args, u32); + u32 dot1q = va_arg (*args, u32); + u32 tag1 = va_arg (*args, u32); + u32 tag2 = va_arg (*args, u32); + switch (vtr_op) { + case L2_VTR_DISABLED: + return format (s, "none"); + case L2_VTR_PUSH_1: + return format (s, "push-1 %s %d", dot1q? "dot1q":"dot1ad", tag1); + case L2_VTR_PUSH_2: + return format (s, "push-2 %s %d %d", dot1q? "dot1q":"dot1ad", tag1, tag2); + case L2_VTR_POP_1: + return format (s, "pop-1"); + case L2_VTR_POP_2: + return format (s, "pop-2"); + case L2_VTR_TRANSLATE_1_1: + return format (s, "trans-1-1 %s %d", dot1q? "dot1q":"dot1ad", tag1); + case L2_VTR_TRANSLATE_1_2: + return format (s, "trans-1-2 %s %d %d",dot1q? "dot1q":"dot1ad", tag1, tag2); + case L2_VTR_TRANSLATE_2_1: + return format (s, "trans-2-1 %s %d", dot1q? "dot1q":"dot1ad", tag1); + case L2_VTR_TRANSLATE_2_2: + return format (s, "trans-2-2 %s %d %d", dot1q? "dot1q":"dot1ad", tag1, tag2); + default: + return format (s, "none"); + } +} + +// show bridge-domain state +// The CLI format is: +// show bridge-domain [<bd_index>] +static clib_error_t * +bd_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index = ~0; + l2_bridge_domain_t * bd_config; + u32 start, end; + u32 printed; + u32 detail = 0; + u32 intf = 0; + u32 arp = 0; + u32 bd_id = ~0; + uword * p; + + start = 0; + end = vec_len(l2input_main.bd_configs); + + if (unformat (input, "%d", &bd_id)) { + if (unformat (input, "detail")) detail = 1; + else if (unformat (input, "det")) detail = 1; + if (unformat (input, "int")) intf = 1; + if (unformat (input, "arp")) arp = 1; + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) bd_index = *p; + else return clib_error_return (0, "No such bridge domain %d", bd_id); + + vec_validate (l2input_main.bd_configs, bd_index); + bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index); + if (bd_is_valid (bd_config)) { + start = bd_index; + end = start + 1; + } else { + vlib_cli_output (vm, "bridge-domain %d not in use", bd_id); + goto done; + } + } + + // Show all bridge-domains that have been initialized + + printed = 0; + for (bd_index=start; bd_index<end; bd_index++) { + bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index); + if (bd_is_valid(bd_config)) { + if (!printed) { + printed = 1; + vlib_cli_output (vm, "%=5s %=7s %=10s %=10s %=10s %=10s %=10s %=14s", + "ID", + "Index", + "Learning", + "U-Forwrd", + "UU-Flood", + "Flooding", + "ARP-Term", + "BVI-Intf"); + } + + vlib_cli_output ( + vm, "%=5d %=7d %=10s %=10s %=10s %=10s %=10s %=14U", + bd_config->bd_id, bd_index, + bd_config->feature_bitmap & L2INPUT_FEAT_LEARN ? "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_FWD ? "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD ? "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD ? "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM ? "on" : "off", + format_vnet_sw_if_index_name_with_NA, vnm, bd_config->bvi_sw_if_index); + + if (detail || intf) { + // Show all member interfaces + + l2_flood_member_t * member; + u32 header = 0; + + vec_foreach(member, bd_config->members) { + u32 vtr_opr, dot1q, tag1, tag2; + if (!header) { + header = 1; + vlib_cli_output (vm, "\n%=30s%=7s%=5s%=5s%=30s", + "Interface", "Index", "SHG", "BVI","VLAN-Tag-Rewrite"); + } + l2vtr_get(vm, vnm, member->sw_if_index, &vtr_opr, &dot1q, &tag1, &tag2); + vlib_cli_output (vm, "%=30U%=7d%=5d%=5s%=30U", + format_vnet_sw_if_index_name, vnm, member->sw_if_index, + member->sw_if_index, + member->shg, + member->flags & L2_FLOOD_MEMBER_BVI ? "*" : "-", + format_vtr, vtr_opr, dot1q, tag1, tag2); + } + } + + if ((detail || arp) && + (bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM)) { + u32 ip4_addr; + u64 mac_addr; + vlib_cli_output (vm, "\n IP4 to MAC table for ARP Termination"); + hash_foreach (ip4_addr, mac_addr, bd_config->mac_by_ip4, ({ + vlib_cli_output (vm, "%=20U => %=20U", + format_ip4_address, &ip4_addr, + format_ethernet_address, &mac_addr); + })); + } + } + } + + if (!printed) { + vlib_cli_output (vm, "no bridge-domains in use"); + } + + done: + return error; +} + +VLIB_CLI_COMMAND (bd_show_cli, static) = { + .path = "show bridge-domain", + .short_help = "show bridge-domain [bridge-domain-id [detail|int|arp]]", + .function = bd_show, +}; diff --git a/vnet/vnet/l2/l2_bd.h b/vnet/vnet/l2/l2_bd.h new file mode 100644 index 00000000000..9d29a83b22f --- /dev/null +++ b/vnet/vnet/l2/l2_bd.h @@ -0,0 +1,120 @@ +/* + * l2_bd.h : layer 2 bridge domain + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2bd_h +#define included_l2bd_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +typedef struct { + // hash bd_id -> bd_index + uword * bd_index_by_bd_id; + + // Busy bd_index bitmap + uword * bd_index_bitmap; + + // convenience + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} bd_main_t; + +bd_main_t bd_main; + +// Bridge domain member + +#define L2_FLOOD_MEMBER_NORMAL 0 +#define L2_FLOOD_MEMBER_BVI 1 + +typedef struct { + u32 sw_if_index; // the output L2 interface + u8 flags; // 0=normal, 1=bvi + u8 shg; // split horizon group number + u16 spare; +} l2_flood_member_t; + + +// Per-bridge domain configuration + +typedef struct { + u32 feature_bitmap; + // Contains bit enables for flooding, learning, and forwarding. + // All other feature bits should always be set. + + // identity of the bridge-domain's BVI interface + // set to ~0 if there is no BVI + u32 bvi_sw_if_index; + + // output node index for bvi interface before it was changed to l2-input + u32 saved_bvi_output_node_index; + + // bridge domain id, not to be confused with bd_index + u32 bd_id; + + // Vector of members in the replication group + l2_flood_member_t * members; + + // hash ip4/ip6 -> mac for arp termination + uword *mac_by_ip4; + uword *mac_by_ip6; + +} l2_bridge_domain_t; + +// Return 1 if bridge domain has been initialized +always_inline u32 +bd_is_valid (l2_bridge_domain_t * bd_config) +{ + return (bd_config->feature_bitmap != 0); +} + +// Init bridge domain if not done already +inline void +bd_validate (l2_bridge_domain_t * bd_config); + + +void +bd_add_member (l2_bridge_domain_t * bd_config, + l2_flood_member_t * member); + +u32 +bd_remove_member (l2_bridge_domain_t * bd_config, + u32 sw_if_index); + + +#define L2_LEARN (1<<0) +#define L2_FWD (1<<1) +#define L2_FLOOD (1<<2) +#define L2_UU_FLOOD (1<<3) +#define L2_ARP_TERM (1<<4) + +u32 +bd_set_flags (vlib_main_t * vm, + u32 bd_index, + u32 flags, + u32 enable); + +u32 bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id); +int bd_delete_bd_index (bd_main_t * bdm, u32 bd_id); + +u32 bd_add_del_ip_mac(u32 bd_index, + u8 *ip_addr, + u8 *mac_addr, + u8 is_ip6, + u8 is_add); + +#endif + diff --git a/vnet/vnet/l2/l2_bvi.c b/vnet/vnet/l2/l2_bvi.c new file mode 100644 index 00000000000..828e955617b --- /dev/null +++ b/vnet/vnet/l2/l2_bvi.c @@ -0,0 +1,35 @@ +/* + * l2_bvi.c : layer 2 Bridged Virtual Interface + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/l2_fwd.h> +#include <vnet/l2/l2_flood.h> +#include <vnet/l2/l2_bvi.h> + + +// Call the L2 nodes that need the ethertype mapping +void +l2bvi_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index) +{ + l2fwd_register_input_type (vm, type, node_index); + l2flood_register_input_type (vm, type, node_index); +} + + diff --git a/vnet/vnet/l2/l2_bvi.h b/vnet/vnet/l2/l2_bvi.h new file mode 100644 index 00000000000..ca5673373fb --- /dev/null +++ b/vnet/vnet/l2/l2_bvi.h @@ -0,0 +1,122 @@ +/* + * l2_bvi.h : layer 2 Bridged Virtual Interface + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2bvi_h +#define included_l2bvi_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/sparse_vec.h> + +#include <vnet/l2/l2_input.h> + +#define TO_BVI_ERR_OK 0 +#define TO_BVI_ERR_TAGGED 1 +#define TO_BVI_ERR_ETHERTYPE 2 + +// Send a packet from L2 processing to L3 via the BVI interface. +// Set next0 to the proper L3 input node. +// Return an error if the packet isn't what we expect. + +static_always_inline u32 +l2_to_bvi (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + vlib_buffer_t * b0, + u32 bvi_sw_if_index, + next_by_ethertype_t * l3_next, + u32 * next0) +{ + u8 l2_len; + u16 ethertype; + u8 * l3h; + + // Save L2 header position which may be changed due to packet replication + vnet_buffer (b0)->ethernet.start_of_ethernet_header = b0->current_data; + + // Strip L2 header + l2_len = vnet_buffer(b0)->l2.l2_len; + vlib_buffer_advance (b0, l2_len); + + l3h = vlib_buffer_get_current (b0); + ethertype = clib_net_to_host_u16(*(u16 *)(l3h - 2)); + + // Set the input interface to be the BVI interface + vnet_buffer(b0)->sw_if_index[VLIB_RX] = bvi_sw_if_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = ~0; + + // Go to appropriate L3 input node + if (ethertype == ETHERNET_TYPE_IP4) { + *next0 = l3_next->input_next_ip4; + } else if (ethertype == ETHERNET_TYPE_IP6) { + *next0 = l3_next->input_next_ip6; + } else { + // uncommon ethertype, check table + u32 i0; + + i0 = sparse_vec_index (l3_next->input_next_by_type, ethertype); + *next0 = vec_elt (l3_next->input_next_by_type, i0); + + if (i0 == SPARSE_VEC_INVALID_INDEX) { + return TO_BVI_ERR_ETHERTYPE; + } + } + + // increment BVI RX interface stat + vlib_increment_combined_counter + (vnet_main->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + vlib_main->cpu_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX], + 1, + vlib_buffer_length_in_chain (vlib_main, b0)); + return TO_BVI_ERR_OK; +} + + +// Prepare a packet that was sent to the BVI interface for L2 processing. + +static_always_inline void +bvi_to_l2 (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 cpu_index, + vlib_buffer_t * b0, + u32 bvi_sw_if_index) +{ + // Set the input interface to be the BVI interface + vnet_buffer(b0)->sw_if_index[VLIB_RX] = bvi_sw_if_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = ~0; + + // Update l2_len in packet which is expected by l2 path, + // including l2 tag push/pop code on output + vnet_update_l2_len(b0); + + // increment BVI TX interface stat + vlib_increment_combined_counter + (vnet_main->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + cpu_index, + bvi_sw_if_index, + 1, + vlib_buffer_length_in_chain (vlib_main, b0)); +} + + +void +l2bvi_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index); +#endif diff --git a/vnet/vnet/l2/l2_classify.c b/vnet/vnet/l2/l2_classify.c new file mode 100644 index 00000000000..a6c8ebbc1b4 --- /dev/null +++ b/vnet/vnet/l2/l2_classify.c @@ -0,0 +1,551 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * l2_classify.c + */ + +#include <vnet/l2/l2_classify.h> +#include <vnet/api_errno.h> + +typedef struct { + /* per-pkt trace data */ + u32 sw_if_index; + u32 next_index; + u32 table_index; + u32 session_offset; +} l2_classify_trace_t; + +typedef struct { + vnet_classify_main_t * vcm; + l2_classify_main_t * l2cm; +} l2_classify_runtime_t; + +/* packet trace format function */ +static u8 * format_l2_classify_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_classify_trace_t * t = va_arg (*args, l2_classify_trace_t *); + + s = format (s, "l2-classify: sw_if_index %d, table %d, offset %x, next %d", + t->sw_if_index, t->table_index, t->session_offset, t->next_index); + return s; +} + +l2_classify_main_t l2_classify_main; + +vlib_node_registration_t l2_classify_node; + +#define foreach_l2_classify_error \ +_(MISS, "Classify misses") \ +_(HIT, "Classify hits") \ +_(CHAIN_HIT, "Classify hits after chain walk") \ +_(DROP, "L2 Classify Drops") + +typedef enum { +#define _(sym,str) L2_CLASSIFY_ERROR_##sym, + foreach_l2_classify_error +#undef _ + L2_CLASSIFY_N_ERROR, +} l2_classify_error_t; + +static char * l2_classify_error_strings[] = { +#define _(sym,string) string, + foreach_l2_classify_error +#undef _ +}; + +static uword +l2_classify_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_classify_next_t next_index; + l2_classify_main_t * cm = &l2_classify_main; + vnet_classify_main_t * vcm = cm->vnet_classify_main; + l2_classify_runtime_t * rt = (l2_classify_runtime_t *)node->runtime_data; + u32 feature_bitmap; + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + f64 now; + + now = vlib_time_now(vm); + + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + /* First pass: compute hash */ + + while (n_left_from > 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, bi1; + ethernet_header_t * h0, * h1; + u32 sw_if_index0, sw_if_index1; + u16 type0, type1; + int type_index0, type_index1; + vnet_classify_table_t * t0, * t1; + u32 table_index0, table_index1; + u64 hash0, hash1; + + + /* prefetch next iteration */ + { + vlib_buffer_t * p1, * p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + h1 = vlib_buffer_get_current (b1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + vnet_buffer(b0)->l2_classify.table_index = ~0; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + vnet_buffer(b1)->l2_classify.table_index = ~0; + + /* Select classifier table based on ethertype */ + type0 = clib_net_to_host_u16 (h0->type); + type1 = clib_net_to_host_u16 (h1->type); + + type_index0 = (type0 == ETHERNET_TYPE_IP4) + ? L2_CLASSIFY_TABLE_IP4 : L2_CLASSIFY_TABLE_OTHER; + type_index0 = (type0 == ETHERNET_TYPE_IP6) + ? L2_CLASSIFY_TABLE_IP6 : type_index0; + + type_index1 = (type1 == ETHERNET_TYPE_IP4) + ? L2_CLASSIFY_TABLE_IP4 : L2_CLASSIFY_TABLE_OTHER; + type_index1 = (type1 == ETHERNET_TYPE_IP6) + ? L2_CLASSIFY_TABLE_IP6 : type_index1; + + vnet_buffer(b0)->l2_classify.table_index = + table_index0 = + rt->l2cm->classify_table_index_by_sw_if_index + [type_index0][sw_if_index0]; + + if (table_index0 != ~0) + { + t0 = pool_elt_at_index (vcm->tables, table_index0); + + vnet_buffer(b0)->l2_classify.hash = hash0 = + vnet_classify_hash_packet (t0, (u8 *) h0); + vnet_classify_prefetch_bucket (t0, hash0); + } + + vnet_buffer(b1)->l2_classify.table_index = + table_index1 = + rt->l2cm->classify_table_index_by_sw_if_index + [type_index1][sw_if_index1]; + + if (table_index1 != ~0) + { + t1 = pool_elt_at_index (vcm->tables, table_index1); + + vnet_buffer(b1)->l2_classify.hash = hash1 = + vnet_classify_hash_packet (t1, (u8 *) h1); + vnet_classify_prefetch_bucket (t1, hash1); + } + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t * b0; + u32 bi0; + ethernet_header_t * h0; + u32 sw_if_index0; + u16 type0; + u32 type_index0; + vnet_classify_table_t * t0; + u32 table_index0; + u64 hash0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + vnet_buffer(b0)->l2_classify.table_index = ~0; + + /* Select classifier table based on ethertype */ + type0 = clib_net_to_host_u16 (h0->type); + + type_index0 = (type0 == ETHERNET_TYPE_IP4) + ? L2_CLASSIFY_TABLE_IP4 : L2_CLASSIFY_TABLE_OTHER; + type_index0 = (type0 == ETHERNET_TYPE_IP6) + ? L2_CLASSIFY_TABLE_IP6 : type_index0; + + vnet_buffer(b0)->l2_classify.table_index = + table_index0 = rt->l2cm->classify_table_index_by_sw_if_index + [type_index0][sw_if_index0]; + + if (table_index0 != ~0) + { + t0 = pool_elt_at_index (vcm->tables, table_index0); + + vnet_buffer(b0)->l2_classify.hash = hash0 = + vnet_classify_hash_packet (t0, (u8 *) h0); + vnet_classify_prefetch_bucket (t0, hash0); + } + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = L2_CLASSIFY_NEXT_ETHERNET_INPUT; + ethernet_header_t * h0; + u32 table_index0; + u64 hash0; + vnet_classify_table_t * t0; + vnet_classify_entry_t * e0; + + if (PREDICT_TRUE (n_left_from > 2)) + { + vlib_buffer_t * p2 = vlib_get_buffer(vm, from[2]); + u64 phash2; + u32 table_index2; + vnet_classify_table_t * tp2; + + /* + * Prefetch table entry two ahead. Buffer / data + * were prefetched above... + */ + table_index2 = vnet_buffer(p2)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index2 != ~0)) + { + tp2 = pool_elt_at_index (vcm->tables, table_index2); + phash2 = vnet_buffer(p2)->l2_classify.hash; + vnet_classify_prefetch_entry (tp2, phash2); + } + } + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current(b0); + table_index0 = vnet_buffer(b0)->l2_classify.table_index; + e0 = 0; + + if (PREDICT_TRUE(table_index0 != ~0)) + { + hash0 = vnet_buffer(b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + + e0 = vnet_classify_find_entry (t0, (u8 *) h0, + hash0, now); + if (e0) + { + vnet_buffer(b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < L2_CLASSIFY_N_NEXT)? + e0->next_index:next0; + hits++; + } + else + { + while (1) + { + if (t0->next_table_index != ~0) + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + else + { + next0 = (t0->miss_next_index < L2_CLASSIFY_N_NEXT)? + t0->miss_next_index:next0; + misses++; + break; + } + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + if (e0) + { + vnet_buffer(b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < L2_CLASSIFY_N_NEXT)? + e0->next_index:next0; + hits++; + chain_hits++; + break; + } + } + } + } + + if (PREDICT_FALSE(next0 == 0)) + b0->error = node->errors[L2_CLASSIFY_ERROR_DROP]; + + if (PREDICT_FALSE (next0 == ~0)) + { + + // Remove ourself from the feature bitmap + feature_bitmap = vnet_buffer(b0)->l2.feature_bitmap + & ~L2INPUT_FEAT_CLASSIFY; + + // save for next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap; + + // Determine the next node + next0 = feat_bitmap_get_next_node_index(cm->feat_next_node_index, + feature_bitmap); + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_classify_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->table_index = table_index0; + t->next_index = next0; + t->session_offset = e0 ? vnet_classify_get_offset (t0, e0) : 0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + L2_CLASSIFY_ERROR_MISS, + misses); + vlib_node_increment_counter (vm, node->node_index, + L2_CLASSIFY_ERROR_HIT, + hits); + vlib_node_increment_counter (vm, node->node_index, + L2_CLASSIFY_ERROR_CHAIN_HIT, + chain_hits); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (l2_classify_node) = { + .function = l2_classify_node_fn, + .name = "l2-classify", + .vector_size = sizeof (u32), + .format_trace = format_l2_classify_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_classify_error_strings), + .error_strings = l2_classify_error_strings, + + .runtime_data_bytes = sizeof (l2_classify_runtime_t), + + .n_next_nodes = L2_CLASSIFY_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_CLASSIFY_NEXT_DROP] = "error-drop", + [L2_CLASSIFY_NEXT_ETHERNET_INPUT] = "ethernet-input-not-l2", + [L2_CLASSIFY_NEXT_IP4_INPUT] = "ip4-input", + [L2_CLASSIFY_NEXT_IP6_INPUT] = "ip6-input", + [L2_CLASSIFY_NEXT_LI] = "li-hit", + }, +}; + +clib_error_t *l2_classify_init (vlib_main_t *vm) +{ + l2_classify_main_t * cm = &l2_classify_main; + l2_classify_runtime_t * rt; + + rt = vlib_node_get_runtime_data (vm, l2_classify_node.index); + + cm->vlib_main = vm; + cm->vnet_main = vnet_get_main(); + cm->vnet_classify_main = &vnet_classify_main; + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2_classify_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + cm->feat_next_node_index); + rt->l2cm = cm; + rt->vcm = cm->vnet_classify_main; + + return 0; +} + +VLIB_INIT_FUNCTION (l2_classify_init); + + +void vnet_l2_classify_enable_disable (u32 sw_if_index, + int enable_disable) +{ + vlib_main_t * vm = vlib_get_main(); + vnet_main_t * vnm = vnet_get_main(); + + if (enable_disable) + set_int_l2_mode (vm, vnm, MODE_L2_CLASSIFY, sw_if_index, + 0, 0, 0, 0); + else + set_int_l2_mode (vm, vnm, MODE_L3, sw_if_index, + 0, 0, 0, 0); +} + +int vnet_l2_classify_set_tables (u32 sw_if_index, + u32 ip4_table_index, + u32 ip6_table_index, + u32 other_table_index) +{ + l2_classify_main_t * cm = &l2_classify_main; + vnet_classify_main_t * vcm = cm->vnet_classify_main; + + /* Assume that we've validated sw_if_index in the API layer */ + + if (ip4_table_index != ~0 && + pool_is_free_index (vcm->tables, ip4_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE; + + if (ip6_table_index != ~0 && + pool_is_free_index (vcm->tables, ip6_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE2; + + if (other_table_index != ~0 && + pool_is_free_index (vcm->tables, other_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE3; + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP4], + sw_if_index); + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP6], + sw_if_index); + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_OTHER], + sw_if_index); + + cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP4] + [sw_if_index] = ip4_table_index; + + cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP6] + [sw_if_index] = ip6_table_index; + + cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_OTHER] + [sw_if_index] = other_table_index; + + return 0; +} + +static clib_error_t * +int_l2_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 sw_if_index = ~0; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + u32 other_table_index = ~0; + int rv; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else if (unformat (input, "ip4-table %d", &ip4_table_index)) + ; + else if (unformat (input, "ip6-table %d", &ip6_table_index)) + ; + else if (unformat (input, "other-table %d", &other_table_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "interface must be specified"); + + + if (ip4_table_index == ~0 && ip6_table_index == ~0 + && other_table_index == ~0) + { + vlib_cli_output (vm, "L2 classification disabled"); + vnet_l2_classify_enable_disable (sw_if_index, 0 /* enable */); + return 0; + } + + rv = vnet_l2_classify_set_tables (sw_if_index, ip4_table_index, + ip6_table_index, other_table_index); + switch(rv) + { + case 0: + vnet_l2_classify_enable_disable (sw_if_index, 1 /* enable */); + break; + + default: + return clib_error_return (0, "vnet_l2_classify_set_tables: %d", + rv); + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (int_l2_classify_cli, static) = { + .path = "set interface l2 classify", + .short_help = + "set interface l2 classify intfc <int> [ip4-table <n>]\n" + " [ip6-table <n>] [other-table <n>]", + .function = int_l2_classify_command_fn, +}; + + diff --git a/vnet/vnet/l2/l2_classify.h b/vnet/vnet/l2/l2_classify.h new file mode 100644 index 00000000000..55c2fc8b00d --- /dev/null +++ b/vnet/vnet/l2/l2_classify.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_vnet_l2_classify_h__ +#define __included_vnet_l2_classify_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +#include <vnet/classify/vnet_classify.h> + +typedef enum { + L2_CLASSIFY_NEXT_DROP, + L2_CLASSIFY_NEXT_ETHERNET_INPUT, + L2_CLASSIFY_NEXT_IP4_INPUT, + L2_CLASSIFY_NEXT_IP6_INPUT, + L2_CLASSIFY_NEXT_LI, + L2_CLASSIFY_N_NEXT, +} l2_classify_next_t; + +typedef enum { + L2_CLASSIFY_TABLE_IP4, + L2_CLASSIFY_TABLE_IP6, + L2_CLASSIFY_TABLE_OTHER, + L2_CLASSIFY_N_TABLES, +} l2_classify_table_id_t; + +typedef struct { + + // Next nodes for each feature + u32 feat_next_node_index[32]; + + /* Per-address-family classifier table vectors */ + u32 * classify_table_index_by_sw_if_index [L2_CLASSIFY_N_TABLES]; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + vnet_classify_main_t * vnet_classify_main; +} l2_classify_main_t; + +l2_classify_main_t l2_classify_main; + +vlib_node_registration_t l2_classify_node; + +void vnet_l2_classify_enable_disable (u32 sw_if_index, + int enable_disable); + +int vnet_l2_classify_set_tables (u32 sw_if_index, u32 ip4_table_index, + u32 ip6_table_index, u32 other_table_index); + +#endif /* __included_vnet_l2_classify_h__ */ diff --git a/vnet/vnet/l2/l2_efp_filter.c b/vnet/vnet/l2/l2_efp_filter.c new file mode 100644 index 00000000000..a8bceca13fe --- /dev/null +++ b/vnet/vnet/l2/l2_efp_filter.c @@ -0,0 +1,572 @@ +/* + * l2_efp_filter.c : layer 2 egress EFP Filter processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_output.h> +#include <vnet/ethernet/ethernet.h> + +#include <vppinfra/error.h> +#include <vppinfra/cache.h> + +typedef struct { + + // Next nodes for features and output interfaces + l2_output_next_nodes_st next_nodes; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_efp_filter_main_t; + + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u8 raw[12]; // raw data (vlans) + u32 sw_if_index; +} l2_efp_filter_trace_t; + +/* packet trace format function */ +static u8 * format_l2_efp_filter_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_efp_filter_trace_t * t = va_arg (*args, l2_efp_filter_trace_t *); + + s = format (s, "l2-output-vtr: sw_if_index %d dst %U src %U data " + "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4], t->raw[5], + t->raw[6], t->raw[7], t->raw[8], t->raw[9], t->raw[10], t->raw[11]); + return s; +} + +l2_efp_filter_main_t l2_efp_filter_main; + +static vlib_node_registration_t l2_efp_filter_node; + +#define foreach_l2_efp_filter_error \ +_(L2_EFP_FILTER, "L2 EFP filter packets") \ +_(DROP, "L2 EFP filter post-rewrite drops") + +typedef enum { +#define _(sym,str) L2_EFP_FILTER_ERROR_##sym, + foreach_l2_efp_filter_error +#undef _ + L2_EFP_FILTER_N_ERROR, +} l2_efp_filter_error_t; + +static char * l2_efp_filter_error_strings[] = { +#define _(sym,string) string, + foreach_l2_efp_filter_error +#undef _ +}; + +typedef enum { + L2_EFP_FILTER_NEXT_DROP, + L2_EFP_FILTER_N_NEXT, +} l2_efp_filter_next_t; + + +// Extract fields from the packet that will be used in interface classification +static_always_inline void +extract_keys (vnet_main_t * vnet_main, + u32 sw_if_index0, + vlib_buffer_t * b0, + u32 * port_sw_if_index0, + u16 * first_ethertype0, + u16 * outer_id0, + u16 * inner_id0, + u32 * match_flags0) +{ + ethernet_header_t * e0; + ethernet_vlan_header_t * h0; + u32 tag_len; + u32 tag_num; + + *port_sw_if_index0 = vnet_get_sup_sw_interface (vnet_main, sw_if_index0)->sw_if_index; + + e0 = vlib_buffer_get_current (b0); + h0 = (ethernet_vlan_header_t *)(e0+1); + + *first_ethertype0 = clib_net_to_host_u16(e0->type); + *outer_id0 = clib_net_to_host_u16 (h0[0].priority_cfi_and_id); + *inner_id0 = clib_net_to_host_u16 (h0[1].priority_cfi_and_id); + + tag_len = vnet_buffer(b0)->l2.l2_len - sizeof(ethernet_header_t); + tag_num = tag_len / sizeof(ethernet_vlan_header_t); + *match_flags0 = eth_create_valid_subint_match_flags (tag_num); +} + +/* + * EFP filtering is a basic switch feature which prevents an interface from + * transmitting a packet that doesn't match the interface's ingress match + * criteria. The check has two parts, one performed before egress vlan tag + * rewrite and one after. + * + * The pre-rewrite check insures the packet matches what an ingress packet looks + * like after going through the interface's ingress tag rewrite operation. Only + * pushed tags are compared. So: + * - if the ingress vlan tag rewrite pushes no tags (or is not enabled), + * any packet passes the filter + * - if the ingress vlan tag rewrite pushes one tag, + * the packet must have at least one tag, and the outer tag must match the pushed tag + * - if the ingress vlan tag rewrite pushes two tags, + * the packet must have at least two tags, and the outer two tags must match the pushed tags + * + * The pre-rewrite check is performed in the l2-output node. + * + * The post-rewrite check insures the packet matches what an ingress packet looks + * like before going through the interface's ingress tag rewrite operation. It verifies + * that such a packet arriving on the wire at this port would be classified as arriving + * an input interface equal to the packet's output interface. This can be done by running + * the output packet's vlan tags and output port through the interface classification, + * and checking if the resulting interface matches the output interface. + * + * The post-rewrite check is performed here. + */ + +static uword +l2_efp_filter_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_efp_filter_next_t next_index; + l2_efp_filter_main_t * msm = &l2_efp_filter_main; + vlib_node_t *n = vlib_get_node (vm, l2_efp_filter_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + u32 cached_sw_if_index = ~0; + u32 cached_next_index = ~0; + + /* invalidate cache to begin with */ + cached_sw_if_index = ~0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + u32 feature_bitmap0, feature_bitmap1; + u16 first_ethertype0, first_ethertype1; + u16 outer_id0, inner_id0, outer_id1, inner_id1; + u32 match_flags0, match_flags1; + u32 port_sw_if_index0, subint_sw_if_index0, port_sw_if_index1, subint_sw_if_index1; + vnet_hw_interface_t * hi0, * hi1; + main_intf_t * main_intf0, * main_intf1; + vlan_intf_t * vlan_intf0, * vlan_intf1; + qinq_intf_t * qinq_intf0, * qinq_intf1; + u32 is_l20, is_l21; + __attribute__((unused)) u32 matched0, matched1; + u8 error0, error1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, * p4, * p5; + __attribute__((unused)) u32 sw_if_index2, sw_if_index3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + // Prefetch the buffer header and packet for the N+2 loop iteration + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + + // Prefetch the input config for the N+1 loop iteration + // This depends on the buffer header above + sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_TX]; + sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_TX]; + //TODO CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index2), CLIB_CACHE_LINE_BYTES, LOAD); + //TODO CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index3), CLIB_CACHE_LINE_BYTES, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* TX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX]; + + // process 2 packets + em->counters[node_counter_base_index + L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 2; + + // Remove ourself from the feature bitmap + feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER; + feature_bitmap1 = vnet_buffer(b1)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER; + + // Determine next node + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2_efp_filter_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b0, + sw_if_index0, + feature_bitmap0, + &next0); + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2_efp_filter_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b1, + sw_if_index1, + feature_bitmap1, + &next1); + + // perform the efp filter check on two packets + + extract_keys (msm->vnet_main, + sw_if_index0, + b0, + &port_sw_if_index0, + &first_ethertype0, + &outer_id0, + &inner_id0, + &match_flags0); + + extract_keys (msm->vnet_main, + sw_if_index1, + b1, + &port_sw_if_index1, + &first_ethertype1, + &outer_id1, + &inner_id1, + &match_flags1); + + eth_vlan_table_lookups (ðernet_main, + msm->vnet_main, + port_sw_if_index0, + first_ethertype0, + outer_id0, + inner_id0, + &hi0, + &main_intf0, + &vlan_intf0, + &qinq_intf0); + + eth_vlan_table_lookups (ðernet_main, + msm->vnet_main, + port_sw_if_index1, + first_ethertype1, + outer_id1, + inner_id1, + &hi1, + &main_intf1, + &vlan_intf1, + &qinq_intf1); + + matched0 = eth_identify_subint (hi0, + b0, + match_flags0, + main_intf0, + vlan_intf0, + qinq_intf0, + &subint_sw_if_index0, + &error0, + &is_l20); + + matched1 = eth_identify_subint (hi1, + b1, + match_flags1, + main_intf1, + vlan_intf1, + qinq_intf1, + &subint_sw_if_index1, + &error1, + &is_l21); + + if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0)) { + // Drop packet + next0 = L2_EFP_FILTER_NEXT_DROP; + b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP]; + } + + if (PREDICT_FALSE (sw_if_index1 != subint_sw_if_index1)) { + // Drop packet + next1 = L2_EFP_FILTER_NEXT_DROP; + b1->error = node->errors[L2_EFP_FILTER_ERROR_DROP]; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) { + if (b0->flags & VLIB_BUFFER_IS_TRACED) { + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + l2_efp_filter_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + memcpy(t->raw, &h0->type, sizeof(t->raw)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) { + ethernet_header_t * h1 = vlib_buffer_get_current (b1); + l2_efp_filter_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + memcpy(t->raw, &h1->type, sizeof(t->raw)); + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + u32 feature_bitmap0; + u16 first_ethertype0; + u16 outer_id0, inner_id0; + u32 match_flags0; + u32 port_sw_if_index0, subint_sw_if_index0; + vnet_hw_interface_t * hi0; + main_intf_t * main_intf0; + vlan_intf_t * vlan_intf0; + qinq_intf_t * qinq_intf0; + u32 is_l20; + __attribute__((unused)) u32 matched0; + u8 error0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + + // process 1 packet + em->counters[node_counter_base_index + L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 1; + + // Remove ourself from the feature bitmap + feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER; + + // Determine next node + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2_efp_filter_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b0, + sw_if_index0, + feature_bitmap0, + &next0); + + // perform the efp filter check on one packet + + extract_keys (msm->vnet_main, + sw_if_index0, + b0, + &port_sw_if_index0, + &first_ethertype0, + &outer_id0, + &inner_id0, + &match_flags0); + + eth_vlan_table_lookups (ðernet_main, + msm->vnet_main, + port_sw_if_index0, + first_ethertype0, + outer_id0, + inner_id0, + &hi0, + &main_intf0, + &vlan_intf0, + &qinq_intf0); + + matched0 = eth_identify_subint (hi0, + b0, + match_flags0, + main_intf0, + vlan_intf0, + qinq_intf0, + &subint_sw_if_index0, + &error0, + &is_l20); + + if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0)) { + // Drop packet + next0 = L2_EFP_FILTER_NEXT_DROP; + b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP]; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + l2_efp_filter_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + memcpy(t->raw, &h0->type, sizeof(t->raw)); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2_efp_filter_node,static) = { + .function = l2_efp_filter_node_fn, + .name = "l2-efp-filter", + .vector_size = sizeof (u32), + .format_trace = format_l2_efp_filter_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_efp_filter_error_strings), + .error_strings = l2_efp_filter_error_strings, + + .n_next_nodes = L2_EFP_FILTER_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_EFP_FILTER_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2_efp_filter_init (vlib_main_t *vm) +{ + l2_efp_filter_main_t * mp = &l2_efp_filter_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2_efp_filter_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names(), + mp->next_nodes.feat_next_node_index); + + // Initialize the output node mapping table + l2output_init_output_node_vec(&mp->next_nodes.output_node_index_vec); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_efp_filter_init); + + +// Enable/disable the EFP Filter check on the subinterface +void l2_efp_filter_configure (vnet_main_t * vnet_main, + u32 sw_if_index, + u32 enable) +{ + // set the interface flag + l2output_intf_bitmap_enable(sw_if_index, L2OUTPUT_FEAT_EFP_FILTER, enable); +} + + +// set subinterface egress efp filter enable/disable +// The CLI format is: +// set interface l2 efp-filter <interface> [disable]] +static clib_error_t * +int_l2_efp_filter (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // enable/disable the feature + l2_efp_filter_configure (vnm, sw_if_index, enable); + + done: + return error; +} + + +VLIB_CLI_COMMAND (int_l2_efp_filter_cli, static) = { + .path = "set interface l2 efp-filter", + .short_help = "set interface l2 efp-filter <interface> [disable]", + .function = int_l2_efp_filter, +}; + diff --git a/vnet/vnet/l2/l2_efp_filter.h b/vnet/vnet/l2/l2_efp_filter.h new file mode 100644 index 00000000000..f8baf092fa8 --- /dev/null +++ b/vnet/vnet/l2/l2_efp_filter.h @@ -0,0 +1,28 @@ +/* + * l2_efp_filter.h : layer 2 egress EFP Filter processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef included_vnet_l2_efp_filter_h +#define included_vnet_l2_efp_filter_h + + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + + +#endif + diff --git a/vnet/vnet/l2/l2_fib.c b/vnet/vnet/l2/l2_fib.c new file mode 100644 index 00000000000..198ffd281bb --- /dev/null +++ b/vnet/vnet/l2/l2_fib.c @@ -0,0 +1,567 @@ +/* + * l2_fib.c : layer 2 forwarding table (aka mac table) + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/l2/l2_fib.h> +#include <vnet/l2/l2_learn.h> +#include <vnet/l2/l2_bd.h> + +#include <vppinfra/bihash_template.c> + +typedef struct { + + /* hash table */ + BVT(clib_bihash) mac_table; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2fib_main_t; + +l2fib_main_t l2fib_main; + + +// Format sw_if_index. If the value is ~0, use the text "N/A" +u8 * format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + u32 sw_if_index = va_arg (*args, u32); + if (sw_if_index == ~0) + return format (s, "N/A"); + else + return format (s, "%U", + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, sw_if_index)); +} + +void l2fib_table_dump (u32 bd_index, l2fib_entry_key_t **l2fe_key, + l2fib_entry_result_t **l2fe_res) +{ + l2fib_main_t * msm = &l2fib_main; + BVT(clib_bihash) * h = &msm->mac_table; + clib_bihash_bucket_t * b; + BVT(clib_bihash_value) * v; + l2fib_entry_key_t key; + l2fib_entry_result_t result; + int i, j, k; + + for (i = 0; i < h->nbuckets; i++) + { + b = &h->buckets[i]; + if (b->offset == 0) + continue; + v = BV(clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1<<b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + key.raw = v->kvp[k].key; + result.raw = v->kvp[k].value; + + if ((bd_index == ~0) || (bd_index == key.fields.bd_index)) + { + vec_add1 (*l2fe_key, key); + vec_add1 (*l2fe_res, result); + } + } + v++; + } + } +} + +// Display the contents of the l2fib +static clib_error_t * +show_l2fib (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + l2fib_main_t * msm = &l2fib_main; + BVT(clib_bihash) * h = &msm->mac_table; + clib_bihash_bucket_t * b; + BVT(clib_bihash_value) * v; + l2fib_entry_key_t key; + l2fib_entry_result_t result; + u32 first_entry = 1; + u64 total_entries = 0; + int i, j, k; + u8 verbose = 0; + u8 raw = 0; + u32 bd_id, bd_index = ~0; + + if (unformat (input, "raw")) + raw = 1; + else if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "bd_index %d", &bd_index)) + verbose = 1; + else if (unformat (input, "bd_id %d", &bd_id)) + { + uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) + { + verbose = 1; + bd_index = p[0]; + } + else + { + vlib_cli_output (vm, "no such bridge domain id"); + return 0; + } + } + + for (i = 0; i < h->nbuckets; i++) + { + b = &h->buckets[i]; + if (b->offset == 0) + continue; + v = BV(clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1<<b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + if (verbose && first_entry) + { + first_entry=0; + vlib_cli_output (vm, + "%=19s%=7s%=30s%=7s%=8s%=8s%=5s%=9s%=11s", + "Mac Address", "BD Idx", "Interface", + "Index", "static", "filter", "bvi", + "refresh", "timestamp"); + } + + key.raw = v->kvp[k].key; + result.raw = v->kvp[k].value; + + if (verbose + & ((bd_index >>31) || (bd_index == key.fields.bd_index))) + { + vlib_cli_output (vm, + "%=19U%=7d%=30U%=7d%=8d%=8d%=5d%=9d%=11X", + format_ethernet_address, key.fields.mac, + key.fields.bd_index, + format_vnet_sw_if_index_name_with_NA, + msm->vnet_main, result.fields.sw_if_index, + result.fields.sw_if_index == ~0 + ? -1 : result.fields.sw_if_index, + result.fields.static_mac, + result.fields.filter, + result.fields.bvi, + result.fields.refresh, + result.fields.timestamp); + } + total_entries++; + } + v++; + } + } + + if (total_entries == 0) + vlib_cli_output (vm, "no l2fib entries"); + else + vlib_cli_output (vm, "%lld l2fib entries", total_entries); + + if (raw) + vlib_cli_output (vm, "Raw Hash Table:\n%U\n", + BV(format_bihash), h, 1 /* verbose */); + + return 0; +} + +VLIB_CLI_COMMAND (show_l2fib_cli, static) = { + .path = "show l2fib", + .short_help = "show l2fib [verbose | bd_id <nn> | bd_index <nn> | raw]", + .function = show_l2fib, +}; + + +// Remove all entries from the l2fib +void l2fib_clear_table (uint keep_static) +{ + l2fib_main_t * mp = &l2fib_main; + + if (keep_static) { + // TODO: remove only non-static entries + } else { + // Remove all entries + BV(clib_bihash_free) (&mp->mac_table); + BV(clib_bihash_init) (&mp->mac_table, "l2fib mac table", + L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); + } + + l2learn_main.global_learn_count = 0; +} + +// Clear all entries in L2FIB +// TODO: Later we may want a way to remove only the non-static entries +static clib_error_t * +clear_l2fib (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2fib_clear_table (0); + return 0; +} + +VLIB_CLI_COMMAND (clear_l2fib_cli, static) = { + .path = "clear l2fib", + .short_help = "Clear l2fib mac forwarding entries", + .function = clear_l2fib, +}; + + +// Add an entry to the l2fib. +// If the entry already exists then overwrite it +void l2fib_add_entry (u64 mac, + u32 bd_index, + u32 sw_if_index, + u32 static_mac, + u32 filter_mac, + u32 bvi_mac) { + l2fib_entry_key_t key; + l2fib_entry_result_t result; + __attribute__((unused)) u32 bucket_contents; + l2fib_main_t * mp = &l2fib_main; + BVT(clib_bihash_kv) kv; + + // set up key + key.raw = l2fib_make_key ((u8 *)&mac, bd_index); + + // set up result + result.raw = 0; // clear all fields + result.fields.sw_if_index = sw_if_index; + result.fields.static_mac = static_mac; + result.fields.filter = filter_mac; + result.fields.bvi = bvi_mac; + + kv.key = key.raw; + kv.value = result.raw; + + BV(clib_bihash_add_del) (&mp->mac_table, &kv, 1 /* is_add */); + + // increment counter if dynamically learned mac + if (result.fields.static_mac) { + l2learn_main.global_learn_count++; + } +} + +// Add an entry to the L2FIB +// The CLI format is: +// l2fib add <mac> <bd> <intf> [static] [bvi] +// l2fib add <mac> <bd> filter +// Note that filter and bvi entries are always static +static clib_error_t * +l2fib_add (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u64 mac; + u32 bd_id; + u32 bd_index; + u32 sw_if_index = ~0; + u32 filter_mac = 0; + u32 static_mac = 0; + u32 bvi_mac = 0; + uword * p; + + if (! unformat_user (input, unformat_ethernet_address, &mac)) + { + error = clib_error_return (0, "expected mac address `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%d", &bd_id)) { + error = clib_error_return (0, "expected bridge domain ID `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) { + error = clib_error_return (0, "bridge domain ID %d invalid", bd_id); + goto done; + } + bd_index = p[0]; + + if (unformat (input, "filter")) { + filter_mac = 1; + static_mac = 1; + + } else { + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + if (unformat (input, "static")) { + static_mac = 1; + } else if (unformat (input, "bvi")) { + bvi_mac = 1; + static_mac = 1; + } + } + + l2fib_add_entry(mac, bd_index, sw_if_index, static_mac, filter_mac, bvi_mac); + + done: + return error; +} + +VLIB_CLI_COMMAND (l2fib_add_cli, static) = { + .path = "l2fib add", + .short_help = "Add l2fib mac forwarding entry <mac> <bd-id> filter | <intf> [static | bvi]", + .function = l2fib_add, +}; + + +static clib_error_t * +l2fib_test_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t * error = 0; + u64 mac, save_mac; + u32 bd_index = 0; + u32 sw_if_index = 8; + u32 filter_mac = 0; + u32 bvi_mac = 0; + u32 is_add = 0; + u32 is_del = 0; + u32 is_check = 0; + u32 count = 1; + int mac_set = 0; + int i; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "mac %U", unformat_ethernet_address, &mac)) + mac_set = 1; + else if (unformat (input, "add")) + is_add = 1; + else if (unformat (input, "del")) + is_del = 1; + else if (unformat (input, "check")) + is_check = 1; + else if (unformat (input, "count %d", &count)) + ; + else + break; + } + + if (mac_set == 0) + return clib_error_return (0, "mac not set"); + + if (is_add == 0 && is_del == 0 && is_check == 0) + return clib_error_return (0, "noop: pick at least one of (add,del,check)"); + + save_mac = mac; + + if (is_add) + { + for (i = 0; i < count; i++) + { + u64 tmp; + l2fib_add_entry(mac, bd_index, sw_if_index, mac, + filter_mac, bvi_mac); + tmp = clib_net_to_host_u64(mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + if (is_check) + { + BVT(clib_bihash_kv) kv; + l2fib_main_t * mp = &l2fib_main; + + mac = save_mac; + + for (i = 0; i < count; i++) + { + u64 tmp; + kv.key = l2fib_make_key ((u8 *)&mac, bd_index); + if (BV(clib_bihash_search) (&mp->mac_table, &kv, &kv)) + { + clib_warning ("key %U AWOL", format_ethernet_address, &mac); + break; + } + tmp = clib_net_to_host_u64(mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + if (is_del) + { + for (i = 0; i < count; i++) + { + u64 tmp; + + l2fib_del_entry (mac, bd_index); + + tmp = clib_net_to_host_u64(mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + return error; +} + +VLIB_CLI_COMMAND (l2fib_test_command, static) = { + .path = "test l2fib", + .short_help = "test l2fib [del] mac <base-addr> count <nn>", + .function = l2fib_test_command_fn, +}; + + +// Delete an entry from the l2fib. +// Return 0 if the entry was deleted, or 1 if it was not found +u32 l2fib_del_entry (u64 mac, + u32 bd_index) { + + l2fib_entry_result_t result; + l2fib_main_t * mp = &l2fib_main; + BVT(clib_bihash_kv) kv; + + // set up key + kv.key = l2fib_make_key ((u8 *)&mac, bd_index); + + if (BV(clib_bihash_search) (&mp->mac_table, &kv, &kv)) + return 1; + + result.raw = kv.value; + + // decrement counter if dynamically learned mac + if (result.fields.static_mac) { + if (l2learn_main.global_learn_count > 0) { + l2learn_main.global_learn_count--; + } + } + + // Remove entry from hash table + BV(clib_bihash_add_del) (&mp->mac_table, &kv, 0 /* is_add */); + return 0; +} + +// Delete an entry from the L2FIB +// The CLI format is: +// l2fib del <mac> <bd-id> +static clib_error_t * +l2fib_del (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u64 mac; + u32 bd_id; + u32 bd_index; + uword * p; + + if (! unformat_user (input, unformat_ethernet_address, &mac)) + { + error = clib_error_return (0, "expected mac address `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%d", &bd_id)) { + error = clib_error_return (0, "expected bridge domain ID `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) { + error = clib_error_return (0, "bridge domain ID %d invalid", bd_id); + goto done; + } + bd_index = p[0]; + + // Delete the entry + if (l2fib_del_entry(mac, bd_index)) { + error = clib_error_return (0, "mac entry not found"); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (l2fib_del_cli, static) = { + .path = "l2fib del", + .short_help = "Delete l2fib mac forwarding entry <mac> <bd-id>", + .function = l2fib_del, +}; + + +BVT(clib_bihash) *get_mac_table(void) { + l2fib_main_t * mp = &l2fib_main; + return &mp->mac_table; +} + +clib_error_t *l2fib_init (vlib_main_t *vm) +{ + l2fib_main_t * mp = &l2fib_main; + l2fib_entry_key_t test_key; + u8 test_mac[6]; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Create the hash table + BV(clib_bihash_init) (&mp->mac_table, "l2fib mac table", + L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); + + // verify the key constructor is good, since it is endian-sensitive + test_mac[0] = 0x11; + test_key.raw = 0; + test_key.raw = l2fib_make_key ((u8 *)&test_mac, 0x1234); + ASSERT (test_key.fields.mac[0] == 0x11); + ASSERT (test_key.fields.bd_index == 0x1234); + + return 0; +} + +VLIB_INIT_FUNCTION (l2fib_init); + diff --git a/vnet/vnet/l2/l2_fib.h b/vnet/vnet/l2/l2_fib.h new file mode 100644 index 00000000000..1dcc0200f60 --- /dev/null +++ b/vnet/vnet/l2/l2_fib.h @@ -0,0 +1,226 @@ +/* + * l2_fib.h : layer 2 forwarding table (aka mac table) + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2fib_h +#define included_l2fib_h + +#include <vlib/vlib.h> +#include <vppinfra/bihash_8_8.h> + +/* + * The size of the hash table + */ +#define L2FIB_NUM_BUCKETS (64 * 1024) +#define L2FIB_MEMORY_SIZE (256<<20) + +/* + * The L2fib key is the mac address and bridge domain ID + */ +typedef struct { + union { + struct { + u16 bd_index; + u8 mac[6]; + } fields; + struct { + u32 w0; + u32 w1; + } words; + u64 raw; + }; +} l2fib_entry_key_t; + +/* + * The l2fib entry results + */ +typedef struct { + union { + struct { + u32 sw_if_index; // output sw_if_index (L3 interface if bvi==1) + + u8 static_mac:1; // static mac, no dataplane learning + u8 bvi:1; // mac is for a bridged virtual interface + u8 filter:1; // drop packets to/from this mac + u8 refresh:1; // refresh flag for aging + u8 unused1:4; + u8 timestamp; // timestamp for aging + u16 unused2; + } fields; + u64 raw; + }; +} l2fib_entry_result_t; + + +// Compute the hash for the given key and return the corresponding bucket index +always_inline +u32 l2fib_compute_hash_bucket (l2fib_entry_key_t *key) { + u32 result; + u32 temp_a; + u32 temp_b; + + result = 0xa5a5a5a5; // some seed + temp_a = key->words.w0; + temp_b = key->words.w1; + hash_mix32(temp_a, temp_b, result); + + return result % L2FIB_NUM_BUCKETS; +} + +always_inline +u64 l2fib_make_key (u8 * mac_address, u16 bd_index) { + u64 temp; + + // The mac address in memory is A:B:C:D:E:F + // The bd id in register is H:L +#if CLIB_ARCH_IS_LITTLE_ENDIAN + // Create the in-register key as F:E:D:C:B:A:H:L + // In memory the key is L:H:A:B:C:D:E:F + temp = *((u64 *)(mac_address - 2)); + temp = (temp & ~0xffff) | (u64)(bd_index); +#else + // Create the in-register key as H:L:A:B:C:D:E:F + // In memory the key is H:L:A:B:C:D:E:F + temp = *((u64 *)(mac_address)) >> 16; + temp = temp | (bd_index << 48); +#endif + + return temp; +} + + + +// Lookup the entry for mac and bd_index in the mac table for 1 packet. +// Cached_key and cached_result are used as a one-entry cache. +// The function reads and updates them as needed. +// +// mac0 and bd_index0 are the keys. The entry is written to result0. +// If the entry was not found, result0 is set to ~0. +// +// key0 and bucket0 return with the computed key and hash bucket, +// convenient if the entry needs to be updated afterward. +// If the cached_result was used, bucket0 is set to ~0. + +static_always_inline void +l2fib_lookup_1 (BVT(clib_bihash) * mac_table, + l2fib_entry_key_t * cached_key, + l2fib_entry_result_t * cached_result, + u8 * mac0, + u16 bd_index0, + l2fib_entry_key_t * key0, + u32 * bucket0, + l2fib_entry_result_t *result0) +{ + // set up key + key0->raw = l2fib_make_key (mac0, bd_index0); + *bucket0 = ~0; + + if (key0->raw == cached_key->raw) { + // Hit in the one-entry cache + result0->raw = cached_result->raw; + } else { + // Do a regular mac table lookup + BVT(clib_bihash_kv) kv; + + kv.key = key0->raw; + kv.value = ~0ULL; + BV(clib_bihash_search_inline) (mac_table, &kv); + result0->raw = kv.value; + + // Update one-entry cache + cached_key->raw = key0->raw; + cached_result->raw = result0->raw; + } +} + + +// Lookup the entry for mac and bd_index in the mac table for 2 packets. +// The lookups for the two packets are interleaved. +// +// Cached_key and cached_result are used as a one-entry cache. +// The function reads and updates them as needed. +// +// mac0 and bd_index0 are the keys. The entry is written to result0. +// If the entry was not found, result0 is set to ~0. The same +// holds for mac1/bd_index1/result1. + +static_always_inline void +l2fib_lookup_2 (BVT(clib_bihash) * mac_table, + l2fib_entry_key_t * cached_key, + l2fib_entry_result_t * cached_result, + u8 * mac0, + u8 * mac1, + u16 bd_index0, + u16 bd_index1, + l2fib_entry_key_t * key0, + l2fib_entry_key_t * key1, + u32 * bucket0, + u32 * bucket1, + l2fib_entry_result_t *result0, + l2fib_entry_result_t *result1) +{ + // set up key + key0->raw = l2fib_make_key (mac0, bd_index0); + key1->raw = l2fib_make_key (mac1, bd_index1); + + if ((key0->raw == cached_key->raw) && + (key1->raw == cached_key->raw)) { + // Both hit in the one-entry cache + result0->raw = cached_result->raw; + result1->raw = cached_result->raw; + *bucket0 = ~0; + *bucket1 = ~0; + + } else { + BVT(clib_bihash_kv) kv0, kv1; + + // Do a regular mac table lookup + // Interleave lookups for packet 0 and packet 1 + kv0.key = key0->raw; + kv1.key = key1->raw; + kv0.value = ~0ULL; + kv1.value = ~0ULL; + + BV(clib_bihash_search_inline) (mac_table, &kv0); + BV(clib_bihash_search_inline) (mac_table, &kv1); + + result0->raw = kv0.value; + result1->raw = kv1.value; + + // Update one-entry cache + cached_key->raw = key1->raw; + cached_result->raw = result1->raw; + } +} + + +BVT(clib_bihash) *get_mac_table(void); +void l2fib_clear_table (uint keep_static); +void l2fib_add_entry (u64 mac, + u32 bd_index, + u32 sw_if_index, + u32 static_mac, + u32 drop_mac, + u32 bvi_mac); +u32 l2fib_del_entry (u64 mac, + u32 bd_index); + +void l2fib_table_dump (u32 bd_index, l2fib_entry_key_t **l2fe_key, + l2fib_entry_result_t **l2fe_res); + +u8 * format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args); + +#endif diff --git a/vnet/vnet/l2/l2_flood.c b/vnet/vnet/l2/l2_flood.c new file mode 100644 index 00000000000..8a702168715 --- /dev/null +++ b/vnet/vnet/l2/l2_flood.c @@ -0,0 +1,520 @@ +/* + * l2_flood.c : layer 2 flooding + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_bvi.h> +#include <vnet/replication.h> +#include <vnet/l2/l2_fib.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> + + +/* + * Flooding uses the packet replication infrastructure to send a copy of the + * packet to each member interface. Logically the replication infrastructure + * expects two graph nodes: a prep node that initiates replication and sends the + * packet to the first destination, and a recycle node that is passed the packet + * after it has been transmitted. + * + * To decrease the amount of code, l2 flooding implements both functions in + * the same graph node. This node can tell if is it being called as the "prep" + * or "recycle" using replication_is_recycled(). + */ + + +typedef struct { + + // Next nodes for each feature + u32 feat_next_node_index[32]; + + // next node index for the L3 input node of each ethertype + next_by_ethertype_t l3_next; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2flood_main_t; + +typedef struct { + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u16 bd_index; +} l2flood_trace_t; + + +/* packet trace format function */ +static u8 * format_l2flood_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2flood_trace_t * t = va_arg (*args, l2flood_trace_t *); + + s = format (s, "l2-flood: sw_if_index %d dst %U src %U bd_index %d", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->bd_index); + return s; +} + +l2flood_main_t l2flood_main; + +static vlib_node_registration_t l2flood_node; + +#define foreach_l2flood_error \ +_(L2FLOOD, "L2 flood packets") \ +_(REPL_FAIL, "L2 replication failures") \ +_(NO_MEMBERS, "L2 replication complete") \ +_(BVI_TAGGED, "BVI packet with vlan tag") \ +_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype") + +typedef enum { +#define _(sym,str) L2FLOOD_ERROR_##sym, + foreach_l2flood_error +#undef _ + L2FLOOD_N_ERROR, +} l2flood_error_t; + +static char * l2flood_error_strings[] = { +#define _(sym,string) string, + foreach_l2flood_error +#undef _ +}; + +typedef enum { + L2FLOOD_NEXT_L2_OUTPUT, + L2FLOOD_NEXT_DROP, + L2FLOOD_N_NEXT, +} l2flood_next_t; + +/* + * Perform flooding on one packet + * + * Due to the way BVI processing can modify the packet, the BVI interface + * (if present) must be processed last in the replication. The member vector + * is arranged so that the BVI interface is always the first element. + * Flooding walks the vector in reverse. + * + * BVI processing causes the packet to go to L3 processing. This strips the + * L2 header, which is fine because the replication infrastructure restores + * it. However L3 processing can trigger larger changes to the packet. For + * example, an ARP request could be turned into an ARP reply, an ICMP request + * could be turned into an ICMP reply. If BVI processing is not performed + * last, the modified packet would be replicated to the remaining members. + */ + +static_always_inline void +l2flood_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + l2flood_main_t * msm, + u64 * counter_base, + vlib_buffer_t * b0, + u32 sw_if_index0, + l2fib_entry_key_t * key0, + u32 * bucket0, + l2fib_entry_result_t * result0, + u32 * next0) +{ + u16 bd_index0; + l2_bridge_domain_t *bd_config; + l2_flood_member_t * members; + i32 current_member; // signed + replication_context_t * ctx; + u8 in_shg = vnet_buffer(b0)->l2.shg; + + if (!replication_is_recycled(b0)) { + + // Do flood "prep node" processing + + // Get config for the bridge domain interface + bd_index0 = vnet_buffer(b0)->l2.bd_index; + bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index0); + members = bd_config->members; + + // Find first member that passes the reflection and SHG checks + current_member = vec_len(members) - 1; + while ((current_member >= 0) && + ((members[current_member].sw_if_index == sw_if_index0) || + (in_shg && members[current_member].shg == in_shg))) { + current_member--; + } + + if (current_member < 0) { + // No members to flood to + *next0 = L2FLOOD_NEXT_DROP; + b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS]; + return; + } + + if ((current_member > 0) && + ((current_member > 1) || + ((members[0].sw_if_index != sw_if_index0) && + (!in_shg || members[0].shg != in_shg)))) { + // If more than one member then initiate replication + ctx = replication_prep (vm, b0, l2flood_node.index, 1 /* l2_packet */); + ctx->feature_replicas = (u64) members; + ctx->feature_counter = current_member; + } + + } else { + + // Do flood "recycle node" processing + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) + { + (void)replication_recycle (vm, b0, 1 /* is_last */); + *next0 = L2FLOOD_NEXT_DROP; + b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL]; + return; + } + + ctx = replication_get_ctx (b0); + replication_clear_recycled (b0); + + members = (l2_flood_member_t *) ctx->feature_replicas; + current_member = (i32)ctx->feature_counter - 1; + + // Find next member that passes the reflection and SHG check + while ((current_member >= 0) && + ((members[current_member].sw_if_index == sw_if_index0) || + (in_shg && members[current_member].shg == in_shg))) { + current_member--; + } + + if (current_member < 0) { + // No more members to flood to. + // Terminate replication and drop packet. + + replication_recycle (vm, b0, 1 /* is_last */); + + *next0 = L2FLOOD_NEXT_DROP; + // Ideally we woudn't bump a counter here, just silently complete + b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS]; + return; + } + + // Restore packet and context and continue replication + ctx->feature_counter = current_member; + replication_recycle (vm, b0, + ((current_member == 0) || /*is_last */ + ((current_member == 1) && + ((members[0].sw_if_index == sw_if_index0) || + (in_shg && members[0].shg == in_shg))))); + } + + // Forward packet to the current member + + if (PREDICT_TRUE(members[current_member].flags == L2_FLOOD_MEMBER_NORMAL)) { + // Do normal L2 forwarding + vnet_buffer(b0)->sw_if_index[VLIB_TX] = members[current_member].sw_if_index; + *next0 = L2FLOOD_NEXT_L2_OUTPUT; + + } else { + // Do BVI processing + u32 rc; + rc = l2_to_bvi (vm, + msm->vnet_main, + b0, + members[current_member].sw_if_index, + &msm->l3_next, + next0); + + if (PREDICT_FALSE(rc)) { + if (rc == TO_BVI_ERR_TAGGED) { + b0->error = node->errors[L2FLOOD_ERROR_BVI_TAGGED]; + *next0 = L2FLOOD_NEXT_DROP; + } else if (rc == TO_BVI_ERR_ETHERTYPE) { + b0->error = node->errors[L2FLOOD_ERROR_BVI_ETHERTYPE]; + *next0 = L2FLOOD_NEXT_DROP; + } + } + } + +} + + +static uword +l2flood_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2flood_next_t next_index; + l2flood_main_t * msm = &l2flood_main; + vlib_node_t *n = vlib_get_node (vm, l2flood_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t * h0, * h1; + l2fib_entry_key_t key0, key1; + l2fib_entry_result_t result0, result1; + u32 bucket0, bucket1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, * p4, * p5; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + // Prefetch the buffer header for the N+2 loop iteration + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + + // Prefetch the replication context for the N+1 loop iteration + // This depends on the buffer header above + replication_prefetch_ctx (p2); + replication_prefetch_ctx (p3); + + // Prefetch the packet for the N+1 loop iteration + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + /* Process 2 x pkts */ + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + /* process 2 pkts */ + em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 2; + + l2flood_process (vm, node, msm, &em->counters[node_counter_base_index], + b0, sw_if_index0, &key0, &bucket0, &result0, &next0); + + l2flood_process (vm, node, msm, &em->counters[node_counter_base_index], + b1, sw_if_index1, &key1, &bucket1, &result1, &next1); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + l2flood_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + l2flood_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->bd_index = vnet_buffer(b1)->l2.bd_index; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t * h0; + l2fib_entry_key_t key0; + l2fib_entry_result_t result0; + u32 bucket0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + + /* process 1 pkt */ + em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 1; + + l2flood_process (vm, node, msm, &em->counters[node_counter_base_index], + b0, sw_if_index0, &key0, &bucket0, &result0, &next0); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2flood_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2flood_node,static) = { + .function = l2flood_node_fn, + .name = "l2-flood", + .vector_size = sizeof (u32), + .format_trace = format_l2flood_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2flood_error_strings), + .error_strings = l2flood_error_strings, + + .n_next_nodes = L2FLOOD_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2FLOOD_NEXT_L2_OUTPUT] = "l2-output", + [L2FLOOD_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2flood_init (vlib_main_t *vm) +{ + l2flood_main_t * mp = &l2flood_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2flood_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2flood_init); + + + +// Add the L3 input node for this ethertype to the next nodes structure +void +l2flood_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index) +{ + l2flood_main_t * mp = &l2flood_main; + u32 next_index; + + next_index = vlib_node_add_next (vm, + l2flood_node.index, + node_index); + + next_by_ethertype_register (&mp->l3_next, type, next_index); +} + + +// set subinterface flood enable/disable +// The CLI format is: +// set interface l2 flood <interface> [disable] +static clib_error_t * +int_flood (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the interface flag + l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_FLOOD, enable); + + done: + return error; +} + +VLIB_CLI_COMMAND (int_flood_cli, static) = { + .path = "set interface l2 flood", + .short_help = "set interface l2 flood <interface> [disable]", + .function = int_flood, +}; diff --git a/vnet/vnet/l2/l2_flood.h b/vnet/vnet/l2/l2_flood.h new file mode 100644 index 00000000000..3c9273d48d5 --- /dev/null +++ b/vnet/vnet/l2/l2_flood.h @@ -0,0 +1,28 @@ +/* + * l2_flood.h : layer 2 flooding + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2flood_h +#define included_l2flood_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + +void +l2flood_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index); +#endif diff --git a/vnet/vnet/l2/l2_fwd.c b/vnet/vnet/l2/l2_fwd.c new file mode 100644 index 00000000000..089d4008ea8 --- /dev/null +++ b/vnet/vnet/l2/l2_fwd.c @@ -0,0 +1,446 @@ +/* + * l2_fwd.c : layer 2 forwarding using l2fib + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_bvi.h> +#include <vnet/l2/l2_fwd.h> +#include <vnet/l2/l2_fib.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/sparse_vec.h> + + +typedef struct { + + // Hash table + BVT(clib_bihash) *mac_table; + + // next node index for the L3 input node of each ethertype + next_by_ethertype_t l3_next; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2fwd_main_t; + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u16 bd_index; +} l2fwd_trace_t; + +/* packet trace format function */ +static u8 * format_l2fwd_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2fwd_trace_t * t = va_arg (*args, l2fwd_trace_t *); + + s = format (s, "l2-fwd: sw_if_index %d dst %U src %U bd_index %d", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->bd_index); + return s; +} + +l2fwd_main_t l2fwd_main; + +static vlib_node_registration_t l2fwd_node; + +#define foreach_l2fwd_error \ +_(L2FWD, "L2 forward packets") \ +_(FLOOD, "L2 forward misses") \ +_(HIT, "L2 forward hits") \ +_(BVI_TAGGED, "BVI packet with vlan tag") \ +_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype") \ +_(FILTER_DROP, "Filter Mac Drop") \ +_(REFLECT_DROP, "Reflection Drop") + +typedef enum { +#define _(sym,str) L2FWD_ERROR_##sym, + foreach_l2fwd_error +#undef _ + L2FWD_N_ERROR, +} l2fwd_error_t; + +static char * l2fwd_error_strings[] = { +#define _(sym,string) string, + foreach_l2fwd_error +#undef _ +}; + +typedef enum { + L2FWD_NEXT_L2_OUTPUT, + L2FWD_NEXT_FLOOD, + L2FWD_NEXT_DROP, + L2FWD_N_NEXT, +} l2fwd_next_t; + +// Forward one packet based on the mac table lookup result + +static_always_inline void +l2fwd_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + l2fwd_main_t * msm, + vlib_error_main_t * em, + vlib_buffer_t * b0, + u32 sw_if_index0, + l2fib_entry_result_t * result0, + u32 * next0) +{ + if (PREDICT_FALSE (result0->raw == ~0)) { + // lookup miss, so flood + // TODO:replicate packet to each intf in bridge-domain + // For now just drop + if (vnet_buffer(b0)->l2.feature_bitmap & L2INPUT_FEAT_UU_FLOOD) { + *next0 = L2FWD_NEXT_FLOOD; + } else { + // Flooding is disabled + b0->error = node->errors[L2FWD_ERROR_FLOOD]; + *next0 = L2FWD_NEXT_DROP; + } + + } else { + + // lookup hit, forward packet +#ifdef COUNTERS + em->counters[node_counter_base_index + L2FWD_ERROR_HIT] += 1; +#endif + + vnet_buffer(b0)->sw_if_index[VLIB_TX] = result0->fields.sw_if_index; + *next0 = L2FWD_NEXT_L2_OUTPUT; + + // perform reflection check + if (PREDICT_FALSE (sw_if_index0 == result0->fields.sw_if_index)) { + b0->error = node->errors[L2FWD_ERROR_REFLECT_DROP]; + *next0 = L2FWD_NEXT_DROP; + + // perform filter check + } else if (PREDICT_FALSE (result0->fields.filter)) { + b0->error = node->errors[L2FWD_ERROR_FILTER_DROP]; + *next0 = L2FWD_NEXT_DROP; + + // perform BVI check + } else if (PREDICT_FALSE (result0->fields.bvi)) { + u32 rc; + rc = l2_to_bvi (vm, + msm->vnet_main, + b0, + vnet_buffer(b0)->sw_if_index[VLIB_TX], + &msm->l3_next, + next0); + + if (PREDICT_FALSE(rc)) { + if (rc == TO_BVI_ERR_TAGGED) { + b0->error = node->errors[L2FWD_ERROR_BVI_TAGGED]; + *next0 = L2FWD_NEXT_DROP; + } else if (rc == TO_BVI_ERR_ETHERTYPE) { + b0->error = node->errors[L2FWD_ERROR_BVI_ETHERTYPE]; + *next0 = L2FWD_NEXT_DROP; + } + } + } + } +} + + +static uword +l2fwd_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2fwd_next_t next_index; + l2fwd_main_t * msm = &l2fwd_main; + vlib_node_t *n = vlib_get_node (vm, l2fwd_node.index); + CLIB_UNUSED(u32 node_counter_base_index) = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + l2fib_entry_key_t cached_key; + l2fib_entry_result_t cached_result; + + // Clear the one-entry cache in case mac table was updated + cached_key.raw = ~0; + cached_result.raw = ~0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t * h0, * h1; + l2fib_entry_key_t key0, key1; + l2fib_entry_result_t result0, result1; + u32 bucket0, bucket1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->bd_index = vnet_buffer(b1)->l2.bd_index; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + /* process 2 pkts */ +#ifdef COUNTERS + em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 2; +#endif + l2fib_lookup_2 (msm->mac_table, &cached_key, &cached_result, + h0->dst_address, + h1->dst_address, + vnet_buffer(b0)->l2.bd_index, + vnet_buffer(b1)->l2.bd_index, + &key0, // not used + &key1, // not used + &bucket0, // not used + &bucket1, // not used + &result0, + &result1); + l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0, &next0); + l2fwd_process (vm, node, msm, em, b1, sw_if_index1, &result1, &next1); + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t * h0; + l2fib_entry_key_t key0; + l2fib_entry_result_t result0; + u32 bucket0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + /* process 1 pkt */ +#ifdef COUNTERS + em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 1; +#endif + l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result, + h0->dst_address, vnet_buffer(b0)->l2.bd_index, + &key0, // not used + &bucket0, // not used + &result0); + l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0, &next0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (l2fwd_node,static) = { + .function = l2fwd_node_fn, + .name = "l2-fwd", + .vector_size = sizeof (u32), + .format_trace = format_l2fwd_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2fwd_error_strings), + .error_strings = l2fwd_error_strings, + + .n_next_nodes = L2FWD_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2FWD_NEXT_L2_OUTPUT] = "l2-output", + [L2FWD_NEXT_FLOOD] = "l2-flood", + [L2FWD_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2fwd_init (vlib_main_t *vm) +{ + l2fwd_main_t * mp = &l2fwd_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + /* init the hash table ptr */ + mp->mac_table = get_mac_table(); + + // Initialize the next nodes for each ethertype + next_by_ethertype_init (&mp->l3_next); + + return 0; +} + +VLIB_INIT_FUNCTION (l2fwd_init); + + +// Add the L3 input node for this ethertype to the next nodes structure +void +l2fwd_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index) +{ + l2fwd_main_t * mp = &l2fwd_main; + u32 next_index; + + next_index = vlib_node_add_next (vm, + l2fwd_node.index, + node_index); + + next_by_ethertype_register (&mp->l3_next, type, next_index); +} + + +// set subinterface forward enable/disable +// The CLI format is: +// set interface l2 forward <interface> [disable] +static clib_error_t * +int_fwd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the interface flag + if (l2input_intf_config(sw_if_index)->xconnect) { + l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_XCONNECT, enable); + } else { + l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_FWD, enable); + } + + done: + return error; +} + +VLIB_CLI_COMMAND (int_fwd_cli, static) = { + .path = "set interface l2 forward", + .short_help = "set interface l2 forward <interface> [disable]", + .function = int_fwd, +}; diff --git a/vnet/vnet/l2/l2_fwd.h b/vnet/vnet/l2/l2_fwd.h new file mode 100644 index 00000000000..f08717dfdf8 --- /dev/null +++ b/vnet/vnet/l2/l2_fwd.h @@ -0,0 +1,29 @@ +/* + * l2_fwd.c : layer 2 forwarding using l2fib + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2fwd_h +#define included_l2fwd_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + + +void +l2fwd_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index); +#endif diff --git a/vnet/vnet/l2/l2_input.c b/vnet/vnet/l2/l2_input.c new file mode 100644 index 00000000000..34f8a77184f --- /dev/null +++ b/vnet/vnet/l2/l2_input.c @@ -0,0 +1,963 @@ +/* + * l2_input.c : layer 2 input packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_output.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_bvi.h> +#include <vnet/l2/l2_fib.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +extern clib_error_t * +ethernet_arp_hw_interface_link_up_down (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags); + +// Feature graph node names +static char * l2input_feat_names[] = { +#define _(sym,name) name, + foreach_l2input_feat +#undef _ +}; + +char **l2input_get_feat_names(void) { + return l2input_feat_names; +} + + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 next_index; + u32 sw_if_index; +} l2input_trace_t; + +/* packet trace format function */ +static u8 * format_l2input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2input_trace_t * t = va_arg (*args, l2input_trace_t *); + + s = format (s, "l2-input: sw_if_index %d dst %U src %U", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src); + return s; +} + +l2input_main_t l2input_main; + +static vlib_node_registration_t l2input_node; + +#define foreach_l2input_error \ +_(L2INPUT, "L2 input packets") \ +_(DROP, "L2 input drops") + +typedef enum { +#define _(sym,str) L2INPUT_ERROR_##sym, + foreach_l2input_error +#undef _ + L2INPUT_N_ERROR, +} l2input_error_t; + +static char * l2input_error_strings[] = { +#define _(sym,string) string, + foreach_l2input_error +#undef _ +}; + +typedef enum { /* */ + L2INPUT_NEXT_LEARN, + L2INPUT_NEXT_FWD, + L2INPUT_NEXT_DROP, + L2INPUT_N_NEXT, +} l2input_next_t; + + +static_always_inline void +classify_and_dispatch (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 cpu_index, + l2input_main_t * msm, + vlib_buffer_t * b0, + u32 *next0) +{ + // Load L2 input feature struct + // Load bridge domain struct + // Parse ethernet header to determine unicast/mcast/broadcast + // take L2 input stat + // classify packet as IP/UDP/TCP, control, other + // mask feature bitmap + // go to first node in bitmap + // Later: optimize VTM + // + // For L2XC, + // set tx sw-if-handle + + u8 mcast_dmac; + __attribute__((unused)) u8 l2bcast; + __attribute__((unused)) u8 l2mcast; + __attribute__((unused)) u8 l2_stat_kind; + u16 ethertype; + u8 protocol; + l2_input_config_t *config; + l2_bridge_domain_t *bd_config; + u16 bd_index0; + u32 feature_bitmap; + u32 feat_mask; + ethernet_header_t * h0; + u8 * l3h0; + u32 sw_if_index0; + u8 bvi_flg = 0; + +#define get_u32(addr) ( *((u32 *)(addr)) ) +#define get_u16(addr) ( *((u16 *)(addr)) ) +#define STATS_IF_LAYER2_UCAST_INPUT_CNT 0 +#define STATS_IF_LAYER2_MCAST_INPUT_CNT 1 +#define STATS_IF_LAYER2_BCAST_INPUT_CNT 2 + + // Check for from-BVI processing + // When we come from ethernet-input, TX is ~0 + if (PREDICT_FALSE (vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)) { + // Set up for a from-bvi packet + bvi_to_l2 (vm, + msm->vnet_main, + cpu_index, + b0, + vnet_buffer(b0)->sw_if_index[VLIB_TX]); + bvi_flg = 1; + } + + // The RX interface can be changed by bvi_to_l2() + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + l3h0 = (u8 *)h0 + vnet_buffer(b0)->l2.l2_len; + + // Determine L3 packet type. Only need to check the common types. + // Used to filter out features that don't apply to common packets. + ethertype = clib_net_to_host_u16(get_u16(l3h0 - 2)); + if (ethertype == ETHERNET_TYPE_IP4) { + protocol = ((ip4_header_t *)l3h0)->protocol; + if ((protocol == IP_PROTOCOL_UDP) || + (protocol == IP_PROTOCOL_TCP)) { + feat_mask = IP_UDP_TCP_FEAT_MASK; + } else { + feat_mask = IP4_FEAT_MASK; + } + } else if (ethertype == ETHERNET_TYPE_IP6) { + protocol = ((ip6_header_t *)l3h0)->protocol; + // Don't bother checking for extension headers for now + if ((protocol == IP_PROTOCOL_UDP) || + (protocol == IP_PROTOCOL_TCP)) { + feat_mask = IP_UDP_TCP_FEAT_MASK; + } else { + feat_mask = IP6_FEAT_MASK; + } + } else if (ethertype == ETHERNET_TYPE_MPLS_UNICAST) { + feat_mask = IP6_FEAT_MASK; + } else { + // allow all features + feat_mask = ~0; + } + + // determine layer2 kind for stat and mask + mcast_dmac = ethernet_address_cast(h0->dst_address); + l2bcast = 0; + l2mcast = 0; + l2_stat_kind = STATS_IF_LAYER2_UCAST_INPUT_CNT; + if (PREDICT_FALSE (mcast_dmac)) { + u32 *dsthi = (u32 *) &h0->dst_address[0]; + u32 *dstlo = (u32 *) &h0->dst_address[2]; + + // Disable bridge forwarding (flooding will execute instead if not xconnect) + feat_mask &= ~(L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD); + if (ethertype != ETHERNET_TYPE_ARP) // Disable ARP-term for non-ARP packet + feat_mask &= ~(L2INPUT_FEAT_ARP_TERM); + + // dest mac is multicast or broadcast + if ((*dstlo == 0xFFFFFFFF) && (*dsthi == 0xFFFFFFFF)) { + // dest mac == FF:FF:FF:FF:FF:FF + l2_stat_kind = STATS_IF_LAYER2_BCAST_INPUT_CNT; + l2bcast=1; + } else { + l2_stat_kind = STATS_IF_LAYER2_MCAST_INPUT_CNT; + l2mcast=1; + } + } + // TODO: take l2 stat + + // Get config for the input interface + config = vec_elt_at_index(msm->configs, sw_if_index0); + + // Save split horizon group, use 0 for BVI to make sure not dropped + vnet_buffer(b0)->l2.shg = bvi_flg ? 0 : config->shg; + + if (config->xconnect) { + // Set the output interface + vnet_buffer(b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index; + + } else { + + // Do bridge-domain processing + bd_index0 = config->bd_index; + // save BD ID for next feature graph nodes + vnet_buffer(b0)->l2.bd_index = bd_index0; + + // Get config for the bridge domain interface + bd_config = vec_elt_at_index(msm->bd_configs, bd_index0); + + // Process bridge domain feature enables. + // To perform learning/flooding/forwarding, the corresponding bit + // must be enabled in both the input interface config and in the + // bridge domain config. In the bd_bitmap, bits for features other + // than learning/flooding/forwarding should always be set. + feat_mask = feat_mask & bd_config->feature_bitmap; + } + + // mask out features from bitmap using packet type and bd config + feature_bitmap = config->feature_bitmap & feat_mask; + + // save for next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap; + + // Determine the next node + *next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + feature_bitmap); +} + + +static uword +l2input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2input_next_t next_index; + l2input_main_t * msm = &l2input_main; + vlib_node_t *n = vlib_get_node (vm, l2input_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, * p4 , * p5; + u32 sw_if_index2, sw_if_index3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + // Prefetch the buffer header and packet for the N+2 loop iteration + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + + // Prefetch the input config for the N+1 loop iteration + // This depends on the buffer header above + sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_RX]; + CLIB_PREFETCH (&msm->configs[sw_if_index2], CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&msm->configs[sw_if_index3], CLIB_CACHE_LINE_BYTES, LOAD); + + // Don't bother prefetching the bridge-domain config (which + // depends on the input config above). Only a small number of + // bridge domains are expected. Plus the structure is small + // and several fit in a cache line. + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) { + /* RX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + if (b0->flags & VLIB_BUFFER_IS_TRACED) { + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + l2input_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) { + ethernet_header_t * h1 = vlib_buffer_get_current (b1); + l2input_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + em->counters[node_counter_base_index + L2INPUT_ERROR_L2INPUT] += 2; + + classify_and_dispatch (vm, + node, + cpu_index, + msm, + b0, + &next0); + + classify_and_dispatch (vm, + node, + cpu_index, + msm, + b1, + &next1); + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + l2input_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + em->counters[node_counter_base_index + L2INPUT_ERROR_L2INPUT] += 1; + + classify_and_dispatch (vm, + node, + cpu_index, + msm, + b0, + &next0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2input_node,static) = { + .function = l2input_node_fn, + .name = "l2-input", + .vector_size = sizeof (u32), + .format_trace = format_l2input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2input_error_strings), + .error_strings = l2input_error_strings, + + .n_next_nodes = L2INPUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2INPUT_NEXT_LEARN] = "l2-learn", + [L2INPUT_NEXT_FWD] = "l2-fwd", + [L2INPUT_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2input_init (vlib_main_t *vm) +{ + l2input_main_t * mp = &l2input_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Get packets RX'd from L2 interfaces + ethernet_register_l2_input (vm, l2input_node.index); + + // Create the config vector + vec_validate(mp->configs, 100); + // create 100 sw interface entries and zero them + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2input_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2input_init); + + +// Get a pointer to the config for the given interface +l2_input_config_t * l2input_intf_config (u32 sw_if_index) +{ + l2input_main_t * mp = &l2input_main; + + vec_validate(mp->configs, sw_if_index); + return vec_elt_at_index(mp->configs, sw_if_index); +} + +// Enable (or disable) the feature in the bitmap for the given interface +u32 l2input_intf_bitmap_enable (u32 sw_if_index, + u32 feature_bitmap, + u32 enable) +{ + l2input_main_t * mp = &l2input_main; + l2_input_config_t *config; + + vec_validate(mp->configs, sw_if_index); + config = vec_elt_at_index(mp->configs, sw_if_index); + + if (enable) { + config->feature_bitmap |= feature_bitmap; + } else { + config->feature_bitmap &= ~feature_bitmap; + } + + return config->feature_bitmap; +} + + + +// Set the subinterface to run in l2 or l3 mode. +// for L3 mode, just the sw_if_index is specified +// for bridged mode, the bd id and bvi flag are also specified +// for xconnect mode, the peer sw_if_index is also specified +// Return 0 if ok, or non-0 if there was an error + +u32 set_int_l2_mode (vlib_main_t * vm, + vnet_main_t * vnet_main, + u32 mode, + u32 sw_if_index, + u32 bd_index, // for bridged interface + u32 bvi, // the bridged interface is the BVI + u32 shg, // the bridged interface's split horizon group + u32 xc_sw_if_index) // peer interface for xconnect +{ + l2input_main_t * mp = &l2input_main; + vnet_main_t * vnm = vnet_get_main(); + vnet_hw_interface_t * hi; + l2_output_config_t * out_config; + l2_input_config_t * config; + l2_bridge_domain_t * bd_config; + l2_flood_member_t member; + u64 mac; + i32 l2_if_adjust = 0; + + hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + + vec_validate(mp->configs, sw_if_index); + config = vec_elt_at_index(mp->configs, sw_if_index); + + if (config->bridge) { + // Interface is already in bridge mode. Undo the existing config. + bd_config = vec_elt_at_index(mp->bd_configs, config->bd_index); + + // remove interface from flood vector + bd_remove_member (bd_config, sw_if_index); + + // undo any BVI-related config + if (bd_config->bvi_sw_if_index == sw_if_index) { + bd_config->bvi_sw_if_index = ~0; + config->bvi = 0; + + // restore output node + hi->output_node_index = bd_config->saved_bvi_output_node_index; + + // delete the l2fib entry for the bvi interface + mac = *((u64 *)hi->hw_address); + l2fib_del_entry (mac, config->bd_index); + } + l2_if_adjust--; + } else if (config->xconnect) { + l2_if_adjust--; + } + + // Initialize the l2-input configuration for the interface + if (mode == MODE_L3) { + config->xconnect = 0; + config->bridge = 0; + config->shg = 0; + config->bd_index = 0; + config->feature_bitmap = L2INPUT_FEAT_DROP; + } else if (mode == MODE_L2_CLASSIFY) { + config->xconnect = 1; + config->bridge = 0; + config->output_sw_if_index = xc_sw_if_index; + + // Make sure last-chance drop is configured + config->feature_bitmap |= L2INPUT_FEAT_DROP | L2INPUT_FEAT_CLASSIFY; + + // Make sure bridging features are disabled + config->feature_bitmap &= + ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD); + shg = 0; // not used in xconnect + + // Insure all packets go to ethernet-input + ethernet_set_rx_redirect (vnet_main, hi, 1); + } else { + + if (mode == MODE_L2_BRIDGE) { + /* + * Remove a check that the interface must be an Ethernet. + * Specifically so we can bridge to L3 tunnel interfaces. + * Here's the check: + * if (hi->hw_class_index != ethernet_hw_interface_class.index) + * + */ + if (!hi) + return MODE_ERROR_ETH; // non-ethernet + + config->xconnect = 0; + config->bridge = 1; + config->bd_index = bd_index; + + // Enable forwarding, flooding, learning and ARP termination by default + // (note that ARP term is disabled on BD feature bitmap by default) + config->feature_bitmap |= L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD | + L2INPUT_FEAT_FLOOD | L2INPUT_FEAT_LEARN | L2INPUT_FEAT_ARP_TERM; + + // Make sure last-chance drop is configured + config->feature_bitmap |= L2INPUT_FEAT_DROP; + + // Make sure xconnect is disabled + config->feature_bitmap &= ~L2INPUT_FEAT_XCONNECT; + + // Set up bridge domain + vec_validate(mp->bd_configs, bd_index); + bd_config = vec_elt_at_index(mp->bd_configs, bd_index); + bd_validate (bd_config); + + // TODO: think: add l2fib entry even for non-bvi interface? + + // Do BVI interface initializations + if (bvi) { + // insure BD has no bvi interface (or replace that one with this??) + if (bd_config->bvi_sw_if_index != ~0) { + return MODE_ERROR_BVI_DEF; // bd already has a bvi interface + } + bd_config->bvi_sw_if_index = sw_if_index; + config->bvi = 1; + + // make BVI outputs go to l2-input + bd_config->saved_bvi_output_node_index = hi->output_node_index; + hi->output_node_index = l2input_node.index; + + // create the l2fib entry for the bvi interface + mac = *((u64 *)hi->hw_address); + l2fib_add_entry (mac, bd_index, sw_if_index, 1, 0, 1); // static + bvi + + // Disable learning by default. no use since l2fib entry is static. + config->feature_bitmap &= ~L2INPUT_FEAT_LEARN; + + // Add BVI to arp_input_next_index_by_hw_if_index table so arp-input + // node can send out ARP response via BVI to BD + ethernet_arp_hw_interface_link_up_down(vnet_main, hi->hw_if_index, 0); + + } + + // Add interface to bridge-domain flood vector + member.sw_if_index = sw_if_index; + member.flags = bvi ? L2_FLOOD_MEMBER_BVI : L2_FLOOD_MEMBER_NORMAL; + member.shg = shg; + bd_add_member (bd_config, &member); + + } else { + config->xconnect = 1; + config->bridge = 0; + config->output_sw_if_index = xc_sw_if_index; + + // Make sure last-chance drop is configured + config->feature_bitmap |= L2INPUT_FEAT_DROP; + + // Make sure bridging features are disabled + config->feature_bitmap &= ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD); + + config->feature_bitmap |= L2INPUT_FEAT_XCONNECT; + shg = 0; // not used in xconnect + } + + // set up split-horizon group + config->shg = shg; + out_config = l2output_intf_config (sw_if_index); + out_config->shg = shg; + + // Test: remove this when non-IP features can be configured. + // Enable a non-IP feature to test IP feature masking + // config->feature_bitmap |= L2INPUT_FEAT_CTRL_PKT; + + l2_if_adjust++; + } + + // Adjust count of L2 interfaces + hi->l2_if_count += l2_if_adjust; + + if (hi->hw_class_index == ethernet_hw_interface_class.index) { + if ((hi->l2_if_count == 1) && (l2_if_adjust == 1)) { + // Just added first L2 interface on this port + + // Set promiscuous mode on the l2 interface + ethernet_set_flags (vnet_main, hi->hw_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + + // Insure all packets go to ethernet-input + ethernet_set_rx_redirect (vnet_main, hi, 1); + + } else if ((hi->l2_if_count == 0) && (l2_if_adjust == -1)) { + // Just removed only L2 subinterface on this port + + // Disable promiscuous mode on the l2 interface + ethernet_set_flags (vnet_main, hi->hw_if_index, 0); + + // Allow ip packets to go directly to ip4-input etc + ethernet_set_rx_redirect (vnet_main, hi, 0); + } + } + + // Set up the L2/L3 flag in the interface parsing tables + ethernet_sw_interface_set_l2_mode(vnm, sw_if_index, (mode!=MODE_L3)); + + return 0; +} + +// set subinterface in bridging mode with a bridge-domain ID +// The CLI format is: +// set interface l2 bridge <interface> <bd> [bvi] [split-horizon-group] +static clib_error_t * +int_l2_bridge (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 sw_if_index; + u32 bvi; + u32 rc; + u32 shg; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%d", &bd_id)) { + error = clib_error_return (0, "expected bridge domain ID `%U'", + format_unformat_error, input); + goto done; + } + + bd_index = bd_find_or_add_bd_index (&bd_main, bd_id); + + // optional bvi + bvi = unformat (input, "bvi"); + + // optional split horizon group + shg = 0; + (void) unformat (input, "%d", &shg); + + // set the interface mode + if ((rc = set_int_l2_mode(vm, vnm, MODE_L2_BRIDGE, sw_if_index, bd_index, bvi, shg, 0))) { + if (rc == MODE_ERROR_ETH) { + error = clib_error_return (0, "bridged interface must be ethernet", + format_unformat_error, input); + } else if (rc == MODE_ERROR_BVI_DEF) { + error = clib_error_return (0, "bridge-domain already has a bvi interface", + format_unformat_error, input); + } else { + error = clib_error_return (0, "invalid configuration for interface", + format_unformat_error, input); + } + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l2_bridge_cli, static) = { + .path = "set interface l2 bridge", + .short_help = "set interface to L2 bridging mode in <bridge-domain ID> [bvi] [shg]", + .function = int_l2_bridge, +}; + +// set subinterface in xconnect mode with another interface +// The CLI format is: +// set interface l2 xconnect <interface> <peer interface> +static clib_error_t * +int_l2_xc (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 xc_sw_if_index; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &xc_sw_if_index)) + { + error = clib_error_return (0, "unknown peer interface `%U'", + format_unformat_error, input); + goto done; + } + + // set the interface mode + if (set_int_l2_mode(vm, vnm, MODE_L2_XC, sw_if_index, 0, 0, 0, xc_sw_if_index)) { + error = clib_error_return (0, "invalid configuration for interface", + format_unformat_error, input); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l2_xc_cli, static) = { + .path = "set interface l2 xconnect", + .short_help = "set interface to L2 cross-connect mode with <peer interface>", + .function = int_l2_xc, +}; + +// set subinterface in L3 mode +// The CLI format is: +// set interface l3 <interface> +static clib_error_t * +int_l3 (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + // set the interface mode + if (set_int_l2_mode(vm, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0)) { + error = clib_error_return (0, "invalid configuration for interface", + format_unformat_error, input); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l3_cli, static) = { + .path = "set interface l3", + .short_help = "set interface to L3 mode", + .function = int_l3, +}; + +// The CLI format is: +// show mode [<if-name1> <if-name2> ...] +static clib_error_t * +show_int_mode (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + char * mode; + u8 * args; + vnet_interface_main_t * im = &vnm->interface_main; + vnet_sw_interface_t * si, * sis = 0; + l2input_main_t * mp = &l2input_main; + l2_input_config_t * config; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + u32 sw_if_index; + + /* See if user wants to show specific interface */ + if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + si = pool_elt_at_index (im->sw_interfaces, sw_if_index); + vec_add1 (sis, si[0]); + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + + } + + if (vec_len (sis) == 0) /* Get all interfaces */ + { + /* Gather interfaces. */ + sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces)); + _vec_len (sis) = 0; + pool_foreach (si, im->sw_interfaces, ({ vec_add1 (sis, si[0]); })); + } + + vec_foreach (si, sis) + { + vec_validate(mp->configs, si->sw_if_index); + config = vec_elt_at_index(mp->configs, si->sw_if_index); + if (config->bridge) { + u32 bd_id; + mode = "l2 bridge"; + bd_id = l2input_main.bd_configs[config->bd_index].bd_id; + + args = format (0, "bd_id %d%s%d", bd_id, + config->bvi ? " bvi shg " : " shg ", config->shg); + } else if (config->xconnect) { + mode = "l2 xconnect"; + args = format (0, "%U", + format_vnet_sw_if_index_name, + vnm, config->output_sw_if_index); + } else { + mode = "l3"; + args = format (0, " "); + } + vlib_cli_output (vm, "%s %U %v\n", + mode, + format_vnet_sw_if_index_name, + vnm, si->sw_if_index, + args); + vec_free (args); + } + +done: + vec_free (sis); + + return error; +} + +VLIB_CLI_COMMAND (show_l2_mode, static) = { + .path = "show mode", + .short_help = "show mode [<if-name1> <if-name2> ...]", + .function = show_int_mode, +}; + +#define foreach_l2_init_function \ +_(feat_bitmap_drop_init) \ +_(l2fib_init) \ +_(l2_classify_init) \ +_(l2bd_init) \ +_(l2fwd_init) \ +_(l2_inacl_init) \ +_(l2input_init) \ +_(l2_vtr_init) \ +_(l2_invtr_init) \ +_(l2_efp_filter_init) \ +_(l2learn_init) \ +_(l2flood_init) \ +_(l2_outacl_init) \ +_(l2output_init) \ +_(l2_patch_init) \ +_(l2_xcrw_init) + +clib_error_t *l2_init (vlib_main_t * vm) +{ + clib_error_t * error; + +#define _(a) do { \ + if ((error = vlib_call_init_function (vm, a))) return error; } \ +while (0); + foreach_l2_init_function; +#undef _ + return 0; +} + +VLIB_INIT_FUNCTION (l2_init); diff --git a/vnet/vnet/l2/l2_input.h b/vnet/vnet/l2/l2_input.h new file mode 100644 index 00000000000..e650162b593 --- /dev/null +++ b/vnet/vnet/l2/l2_input.h @@ -0,0 +1,279 @@ +/* + * l2_input.h : layer 2 input packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_input_h +#define included_vnet_l2_input_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/l2_bd.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip.h> + +// Per-subinterface L2 feature configuration + +typedef struct { + + union { + u16 bd_index; // bridge domain id + u32 output_sw_if_index; // for xconnect + }; + + // Interface mode. If both are 0, this interface is in L3 mode + u8 xconnect; + u8 bridge; + + // this is the bvi interface for the bridge-domain + u8 bvi; + + // config for which input features are configured on this interface + u32 feature_bitmap; + + // some of these flags are also in the feature bitmap + u8 learn_enable; + u8 fwd_enable; + u8 flood_enable; + + // split horizon group + u8 shg; + +} l2_input_config_t; + + +typedef struct { + + // Next nodes for the feature bitmap + u32 feat_next_node_index[32]; + + /* config vector indexed by sw_if_index */ + l2_input_config_t *configs; + + /* bridge domain config vector indexed by BD ID */ + l2_bridge_domain_t *bd_configs; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2input_main_t; + +extern l2input_main_t l2input_main; + +static_always_inline l2_bridge_domain_t * +l2input_bd_config_from_index (l2input_main_t * l2im, u32 bd_index) +{ + l2_bridge_domain_t * bd_config; + + bd_config = vec_elt_at_index (l2im->bd_configs, bd_index); + return bd_is_valid (bd_config) ? bd_config : NULL; +} + +// L2 input features + +// Mappings from feature ID to graph node name +#define foreach_l2input_feat \ + _(DROP, "feature-bitmap-drop") \ + _(CLASSIFY, "l2-classify") \ + _(XCONNECT, "l2-output") \ + _(IPIW, "feature-bitmap-drop") \ + _(FLOOD, "l2-flood") \ + _(ARP_TERM, "arp-term-l2bd") \ + _(UU_FLOOD, "l2-flood") \ + _(FWD, "l2-fwd") \ + _(LEARN, "l2-learn") \ + _(VTR, "l2-input-vtr") \ + _(VPATH, "vpath-input-l2") \ + _(CTRL_PKT, "feature-bitmap-drop") \ + _(L2PT, "feature-bitmap-drop") \ + _(IGMP_SNOOP, "feature-bitmap-drop") \ + _(MLD_SNOOP, "feature-bitmap-drop") \ + _(DHCP_SNOOP, "feature-bitmap-drop") \ + _(DAI, "feature-bitmap-drop") \ + _(IPSG, "feature-bitmap-drop") \ + _(ACL, "l2-input-acl") \ + _(QOS, "feature-bitmap-drop") \ + _(CFM, "feature-bitmap-drop") \ + _(SPAN, "feature-bitmap-drop") + +// Feature bitmap positions +typedef enum { +#define _(sym,str) L2INPUT_FEAT_##sym##_BIT, + foreach_l2input_feat +#undef _ + L2INPUT_N_FEAT, +} l2input_feat_t; + +// Feature bit masks +typedef enum { +#define _(sym,str) L2INPUT_FEAT_##sym = (1<<L2INPUT_FEAT_##sym##_BIT), + foreach_l2input_feat +#undef _ +} l2input_feat_masks_t; + +// Return an array of strings containing graph node names of each feature +char **l2input_get_feat_names(void); + + +static_always_inline u8 bd_feature_flood (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD) == + L2INPUT_FEAT_FLOOD); +} + +static_always_inline u8 bd_feature_uu_flood (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD) == + L2INPUT_FEAT_UU_FLOOD); +} + +static_always_inline u8 bd_feature_forward (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_FWD) == + L2INPUT_FEAT_FWD); +} + +static_always_inline u8 bd_feature_learn (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_LEARN) == + L2INPUT_FEAT_LEARN); +} + +static_always_inline u8 bd_feature_arp_term (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM) == + L2INPUT_FEAT_ARP_TERM); +} + +// Masks for eliminating features that do not apply to a packet + +#define IP4_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \ + L2INPUT_FEAT_MLD_SNOOP | \ + L2INPUT_FEAT_L2PT | \ + L2INPUT_FEAT_CFM | \ + L2INPUT_FEAT_DAI) + +#define IP6_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \ + L2INPUT_FEAT_IGMP_SNOOP | \ + L2INPUT_FEAT_L2PT | \ + L2INPUT_FEAT_CFM | \ + L2INPUT_FEAT_DAI) + +#define IP_UDP_TCP_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \ + L2INPUT_FEAT_L2PT | \ + L2INPUT_FEAT_IGMP_SNOOP | \ + L2INPUT_FEAT_MLD_SNOOP | \ + L2INPUT_FEAT_DHCP_SNOOP | \ + L2INPUT_FEAT_CFM | \ + L2INPUT_FEAT_DAI) + +#define MPLS_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \ + L2INPUT_FEAT_L2PT | \ + L2INPUT_FEAT_IGMP_SNOOP | \ + L2INPUT_FEAT_MLD_SNOOP | \ + L2INPUT_FEAT_DHCP_SNOOP | \ + L2INPUT_FEAT_CFM | \ + L2INPUT_FEAT_DAI) + + +// Get a pointer to the config for the given interface +l2_input_config_t * l2input_intf_config (u32 sw_if_index); + +// Enable (or disable) the feature in the bitmap for the given interface +u32 l2input_intf_bitmap_enable (u32 sw_if_index, + u32 feature_bitmap, + u32 enable); + + +#define MODE_L3 0 +#define MODE_L2_BRIDGE 1 +#define MODE_L2_XC 2 +#define MODE_L2_CLASSIFY 3 + +#define MODE_ERROR_ETH 1 +#define MODE_ERROR_BVI_DEF 2 + +u32 set_int_l2_mode (vlib_main_t * vm, + vnet_main_t * vnet_main, + u32 mode, + u32 sw_if_index, + u32 bd_index, + u32 bvi, + u32 shg, + u32 xc_sw_if_index); + +static inline void +vnet_update_l2_len (vlib_buffer_t * b) +{ + ethernet_header_t * eth; + u16 ethertype; + + /* point at currrent l2 hdr */ + eth = vlib_buffer_get_current (b); + + /* + * l2-output pays no attention to this + * but the tag push/pop code on an l2 subif needs it. + * + * Determine l2 header len, check for up to 2 vlans + */ + vnet_buffer(b)->l2.l2_len = sizeof(ethernet_header_t); + ethertype = clib_net_to_host_u16(eth->type); + if ((ethertype == ETHERNET_TYPE_VLAN) || + (ethertype == ETHERNET_TYPE_DOT1AD) || + (ethertype == ETHERNET_TYPE_VLAN_9100) || + (ethertype == ETHERNET_TYPE_VLAN_9200)) { + ethernet_vlan_header_t * vlan; + vnet_buffer(b)->l2.l2_len += sizeof (*vlan); + vlan = (void *) (eth+1); + ethertype = clib_net_to_host_u16 (vlan->type); + if (ethertype == ETHERNET_TYPE_VLAN) { + vnet_buffer(b)->l2.l2_len += sizeof (*vlan); + } + } +} + +/* + * Compute flow hash of an ethernet packet, use 5-tuple hash if L3 packet + * is ip4 or ip6. Otherwise hash on smac/dmac/etype. + * The vlib buffer current pointer is expected to be at ethernet header + * and vnet l2.l2_len is exppected to be setup already. + */ +static inline u32 vnet_l2_compute_flow_hash (vlib_buffer_t *b) +{ + ethernet_header_t * eh = vlib_buffer_get_current(b); + u8 * l3h = (u8 *)eh + vnet_buffer(b)->l2.l2_len; + u16 ethertype = clib_net_to_host_u16(*(u16 *)(l3h - 2)); + + if (ethertype == ETHERNET_TYPE_IP4) + return ip4_compute_flow_hash((ip4_header_t *) l3h, IP_FLOW_HASH_DEFAULT); + else if (ethertype == ETHERNET_TYPE_IP6) + return ip6_compute_flow_hash((ip6_header_t *) l3h, IP_FLOW_HASH_DEFAULT); + else + { + u32 a, b, c; + u32 * ap = (u32 *) &eh->dst_address[2]; + u32 * bp = (u32 *) &eh->src_address[2]; + a = * ap; + b = * bp; + c = ethertype; + hash_v3_mix32 (a, b, c); + hash_v3_finalize32 (a, b, c); + return c; + } +} + +#endif + diff --git a/vnet/vnet/l2/l2_input_acl.c b/vnet/vnet/l2/l2_input_acl.c new file mode 100644 index 00000000000..77fa8944e9f --- /dev/null +++ b/vnet/vnet/l2/l2_input_acl.c @@ -0,0 +1,427 @@ +/* + * l2_input_acl.c : layer 2 input acl processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +#include <vnet/classify/vnet_classify.h> +#include <vnet/classify/input_acl.h> + +typedef struct { + + // Next nodes for each feature + u32 feat_next_node_index[32]; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_inacl_main_t; + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 table_index; + u32 offset; +} l2_inacl_trace_t; + +/* packet trace format function */ +static u8 * format_l2_inacl_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_inacl_trace_t * t = va_arg (*args, l2_inacl_trace_t *); + + s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d", + t->sw_if_index, t->next_index, t->table_index, t->offset); + return s; +} + +l2_inacl_main_t l2_inacl_main; + +static vlib_node_registration_t l2_inacl_node; + +#define foreach_l2_inacl_error \ +_(NONE, "valid input ACL packets") \ +_(MISS, "input ACL misses") \ +_(HIT, "input ACL hits") \ +_(CHAIN_HIT, "input ACL hits after chain walk") \ +_(TABLE_MISS, "input ACL table-miss drops") \ +_(SESSION_DENY, "input ACL session deny drops") + + +typedef enum { +#define _(sym,str) L2_INACL_ERROR_##sym, + foreach_l2_inacl_error +#undef _ + L2_INACL_N_ERROR, +} l2_inacl_error_t; + +static char * l2_inacl_error_strings[] = { +#define _(sym,string) string, + foreach_l2_inacl_error +#undef _ +}; + +static uword +l2_inacl_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + acl_next_index_t next_index; + l2_inacl_main_t * msm = &l2_inacl_main; + input_acl_main_t * am = &input_acl_main; + vnet_classify_main_t * vcm = am->vnet_classify_main; + input_acl_table_id_t tid = INPUT_ACL_TABLE_L2; + f64 now = vlib_time_now (vm); + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + /* First pass: compute hashes */ + while (n_left_from > 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, bi1; + u8 * h0, * h1; + u32 sw_if_index0, sw_if_index1; + u32 table_index0, table_index1; + vnet_classify_table_t * t0, * t1; + + /* prefetch next iteration */ + { + vlib_buffer_t * p1, * p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + h1 = b1->data; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = am->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + table_index1 = am->classify_table_index_by_sw_if_index[tid][sw_if_index1]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + + t1 = pool_elt_at_index (vcm->tables, table_index1); + + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + vnet_buffer(b1)->l2_classify.hash = + vnet_classify_hash_packet (t1, (u8 *) h1); + + vnet_classify_prefetch_bucket (t1, vnet_buffer(b1)->l2_classify.hash); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + + vnet_buffer(b1)->l2_classify.table_index = table_index1; + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t * b0; + u32 bi0; + u8 * h0; + u32 sw_if_index0; + u32 table_index0; + vnet_classify_table_t * t0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = am->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = ACL_NEXT_INDEX_DENY; + u32 table_index0; + vnet_classify_table_t * t0; + vnet_classify_entry_t * e0; + u64 hash0; + u8 * h0; + u8 error0; + + /* Stride 3 seems to work best */ + if (PREDICT_TRUE (n_left_from > 3)) + { + vlib_buffer_t * p1 = vlib_get_buffer(vm, from[3]); + vnet_classify_table_t * tp1; + u32 table_index1; + u64 phash1; + + table_index1 = vnet_buffer(p1)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index1 != ~0)) + { + tp1 = pool_elt_at_index (vcm->tables, table_index1); + phash1 = vnet_buffer(p1)->l2_classify.hash; + vnet_classify_prefetch_entry (tp1, phash1); + } + } + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + table_index0 = vnet_buffer(b0)->l2_classify.table_index; + e0 = 0; + t0 = 0; + + /* Feature bitmap update */ + vnet_buffer(b0)->l2.feature_bitmap &= ~L2INPUT_FEAT_ACL; + + /* Determine the next node */ + next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + vnet_buffer(b0)->l2.feature_bitmap); + + if (PREDICT_TRUE(table_index0 != ~0)) + { + hash0 = vnet_buffer(b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, + now); + if (e0) + { + vlib_buffer_advance (b0, e0->advance); + + next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT)? + e0->next_index:next0; + + hits++; + + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + L2_INACL_ERROR_SESSION_DENY:L2_INACL_ERROR_NONE; + b0->error = node->errors[error0]; + } + else + { + while (1) + { + if (PREDICT_TRUE(t0->next_table_index != ~0)) + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + else + { + next0 = (t0->miss_next_index < ACL_NEXT_INDEX_N_NEXT)? + t0->miss_next_index:next0; + + misses++; + + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + L2_INACL_ERROR_TABLE_MISS:L2_INACL_ERROR_NONE; + b0->error = node->errors[error0]; + break; + } + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry + (t0, (u8 *) h0, hash0, now); + if (e0) + { + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT)? + e0->next_index:next0; + hits++; + chain_hits++; + + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + L2_INACL_ERROR_SESSION_DENY:L2_INACL_ERROR_NONE; + b0->error = node->errors[error0]; + break; + } + } + } + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_inacl_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + t->table_index = t0 ? t0 - vcm->tables : ~0; + t->offset = e0 ? vnet_classify_get_offset (t0, e0): ~0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + L2_INACL_ERROR_MISS, + misses); + vlib_node_increment_counter (vm, node->node_index, + L2_INACL_ERROR_HIT, + hits); + vlib_node_increment_counter (vm, node->node_index, + L2_INACL_ERROR_CHAIN_HIT, + chain_hits); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (l2_inacl_node,static) = { + .function = l2_inacl_node_fn, + .name = "l2-input-acl", + .vector_size = sizeof (u32), + .format_trace = format_l2_inacl_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_inacl_error_strings), + .error_strings = l2_inacl_error_strings, + + .n_next_nodes = ACL_NEXT_INDEX_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [ACL_NEXT_INDEX_DENY] = "error-drop", + }, +}; + +clib_error_t *l2_inacl_init (vlib_main_t *vm) +{ + l2_inacl_main_t * mp = &l2_inacl_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2_inacl_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_inacl_init); + + +// set subinterface inacl enable/disable +// The CLI format is: +// set interface acl input <interface> [disable] +static clib_error_t * +int_l2_inacl (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the interface flag + l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_ACL, enable); + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l2_inacl_cli, static) = { + .path = "set interface acl input", + .short_help = "set interface acl input <interface> [disable]", + .function = int_l2_inacl, +}; diff --git a/vnet/vnet/l2/l2_input_vtr.c b/vnet/vnet/l2/l2_input_vtr.c new file mode 100644 index 00000000000..d07a0287d04 --- /dev/null +++ b/vnet/vnet/l2/l2_input_vtr.c @@ -0,0 +1,314 @@ +/* + * l2_input_vtr.c : layer 2 input vlan tag rewrite processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> +#include <vnet/l2/l2_input_vtr.h> +#include <vnet/l2/l2_output.h> + +#include <vppinfra/error.h> +#include <vppinfra/cache.h> + + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u8 raw[12]; // raw data (vlans) + u32 sw_if_index; +} l2_invtr_trace_t; + +/* packet trace format function */ +static u8 * format_l2_invtr_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_invtr_trace_t * t = va_arg (*args, l2_invtr_trace_t *); + + s = format (s, "l2-input-vtr: sw_if_index %d dst %U src %U data " + "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4], t->raw[5], + t->raw[6], t->raw[7], t->raw[8], t->raw[9], t->raw[10], t->raw[11]); + return s; +} + +l2_invtr_main_t l2_invtr_main; + +static vlib_node_registration_t l2_invtr_node; + +#define foreach_l2_invtr_error \ +_(L2_INVTR, "L2 inverter packets") \ +_(DROP, "L2 input tag rewrite drops") + +typedef enum { +#define _(sym,str) L2_INVTR_ERROR_##sym, + foreach_l2_invtr_error +#undef _ + L2_INVTR_N_ERROR, +} l2_invtr_error_t; + +static char * l2_invtr_error_strings[] = { +#define _(sym,string) string, + foreach_l2_invtr_error +#undef _ +}; + +typedef enum { + L2_INVTR_NEXT_DROP, + L2_INVTR_N_NEXT, +} l2_invtr_next_t; + + +static uword +l2_invtr_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_invtr_next_t next_index; + l2_invtr_main_t * msm = &l2_invtr_main; + // vlib_node_t *n = vlib_get_node (vm, l2_invtr_node.index); + // u32 node_counter_base_index = n->error_heap_index; + // vlib_error_main_t * em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + u32 feature_bitmap0, feature_bitmap1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, * p4, * p5; + u32 sw_if_index2, sw_if_index3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + // Prefetch the buffer header and packet for the N+2 loop iteration + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + + // Prefetch the input config for the N+1 loop iteration + // This depends on the buffer header above + sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_RX]; + CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index2), CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index3), CLIB_CACHE_LINE_BYTES, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + // process 2 packets + // em->counters[node_counter_base_index + L2_INVTR_ERROR_L2_INVTR] += 2; + + // Remove ourself from the feature bitmap + feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR; + feature_bitmap1 = vnet_buffer(b1)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR; + + // save for next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap0; + vnet_buffer(b1)->l2.feature_bitmap = feature_bitmap1; + + // Determine the next node + next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + feature_bitmap0); + next1 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + feature_bitmap1); + + // perform the tag rewrite on two packets + if (l2_vtr_process(b0, &(vec_elt_at_index(l2output_main.configs, sw_if_index0)->input_vtr))) { + // Drop packet + next0 = L2_INVTR_NEXT_DROP; + b0->error = node->errors[L2_INVTR_ERROR_DROP]; + } + if (l2_vtr_process(b1, &(vec_elt_at_index(l2output_main.configs, sw_if_index1)->input_vtr))) { + // Drop packet + next1 = L2_INVTR_NEXT_DROP; + b1->error = node->errors[L2_INVTR_ERROR_DROP]; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) { + if (b0->flags & VLIB_BUFFER_IS_TRACED) { + l2_invtr_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + memcpy(t->raw, &h0->type, sizeof(t->raw)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) { + l2_invtr_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + ethernet_header_t * h1 = vlib_buffer_get_current (b1); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + memcpy(t->raw, &h1->type, sizeof(t->raw)); + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + u32 feature_bitmap0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + // process 1 packet + // em->counters[node_counter_base_index + L2_INVTR_ERROR_L2_INVTR] += 1; + + // Remove ourself from the feature bitmap + feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR; + + // save for next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap0; + + // Determine the next node + next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + feature_bitmap0); + + // perform the tag rewrite on one packet + if (l2_vtr_process(b0, &(vec_elt_at_index(l2output_main.configs, sw_if_index0)->input_vtr))) { + // Drop packet + next0 = L2_INVTR_NEXT_DROP; + b0->error = node->errors[L2_INVTR_ERROR_DROP]; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + l2_invtr_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + memcpy(t->raw, &h0->type, sizeof(t->raw)); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2_invtr_node,static) = { + .function = l2_invtr_node_fn, + .name = "l2-input-vtr", + .vector_size = sizeof (u32), + .format_trace = format_l2_invtr_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_invtr_error_strings), + .error_strings = l2_invtr_error_strings, + + .n_next_nodes = L2_INVTR_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_INVTR_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2_invtr_init (vlib_main_t *vm) +{ + l2_invtr_main_t * mp = &l2_invtr_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2_invtr_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_invtr_init); + diff --git a/vnet/vnet/l2/l2_input_vtr.h b/vnet/vnet/l2/l2_input_vtr.h new file mode 100644 index 00000000000..57c8e409dea --- /dev/null +++ b/vnet/vnet/l2/l2_input_vtr.h @@ -0,0 +1,43 @@ +/* + * l2_input_vtr.h : layer 2 input vlan tag rewrite processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_input_vtr_h +#define included_vnet_l2_input_vtr_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> + + +typedef struct { + + // The input vtr data is located in l2_output_config_t because + // the same config data is used for the egress EFP Filter check. + + // Next nodes for each feature + u32 feat_next_node_index[32]; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_invtr_main_t; + +extern l2_invtr_main_t l2_invtr_main; + +#endif // included_vnet_l2_input_vtr_h + diff --git a/vnet/vnet/l2/l2_learn.c b/vnet/vnet/l2/l2_learn.c new file mode 100644 index 00000000000..29315bedc98 --- /dev/null +++ b/vnet/vnet/l2/l2_learn.c @@ -0,0 +1,504 @@ +/* + * l2_learn.c : layer 2 learning using l2fib + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_fib.h> +#include <vnet/l2/l2_learn.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> + +/* + * Ethernet bridge learning + * + * Populate the mac table with entries mapping the packet's source mac + bridge + * domain ID to the input sw_if_index. + * + * Note that learning and forwarding are separate graph nodes. This means that + * for a set of packets, all learning is performed first, then all nodes are + * forwarded. The forwarding is done based on the end-state of the mac table, + * instead of the state after each packet. Thus the forwarding results could + * differ in certain cases (mac move tests), but this not expected to cause + * problems in real-world networks. It is much simpler to separate learning + * and forwarding into separate nodes. + */ + + +typedef struct { + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u16 bd_index; +} l2learn_trace_t; + + +/* packet trace format function */ +static u8 * format_l2learn_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2learn_trace_t * t = va_arg (*args, l2learn_trace_t *); + + s = format (s, "l2-learn: sw_if_index %d dst %U src %U bd_index %d", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->bd_index); + return s; +} + +static vlib_node_registration_t l2learn_node; + +#define foreach_l2learn_error \ +_(L2LEARN, "L2 learn packets") \ +_(MISS, "L2 learn misses") \ +_(MAC_MOVE, "L2 mac moves") \ +_(MAC_MOVE_VIOLATE, "L2 mac move violations") \ +_(LIMIT, "L2 not learned due to limit") \ +_(HIT, "L2 learn hits") \ +_(FILTER_DROP, "L2 filter mac drops") + +typedef enum { +#define _(sym,str) L2LEARN_ERROR_##sym, + foreach_l2learn_error +#undef _ + L2LEARN_N_ERROR, +} l2learn_error_t; + +static char * l2learn_error_strings[] = { +#define _(sym,string) string, + foreach_l2learn_error +#undef _ +}; + +typedef enum { + L2LEARN_NEXT_L2FWD, + L2LEARN_NEXT_DROP, + L2LEARN_N_NEXT, +} l2learn_next_t; + + +// Perform learning on one packet based on the mac table lookup result + +static_always_inline void +l2learn_process (vlib_node_runtime_t * node, + l2learn_main_t * msm, + u64 * counter_base, + vlib_buffer_t * b0, + u32 sw_if_index0, + l2fib_entry_key_t * key0, + l2fib_entry_key_t * cached_key, + u32 * bucket0, + l2fib_entry_result_t * result0, + u32 * next0) +{ + u32 feature_bitmap; + + // Set up the default next node (typically L2FWD) + + // Remove ourself from the feature bitmap + feature_bitmap = vnet_buffer(b0)->l2.feature_bitmap & ~L2INPUT_FEAT_LEARN; + + // Save for next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap; + + // Determine the next node + *next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + feature_bitmap); + + // Check mac table lookup result + + if (PREDICT_TRUE (result0->fields.sw_if_index == sw_if_index0)) { + // The entry was in the table, and the sw_if_index matched, the normal case + + // TODO: for dataplane learning and aging, do this: + // if refresh=0 and not a static mac, set refresh=1 + counter_base[L2LEARN_ERROR_HIT] += 1; + + } else if (result0->raw == ~0) { + + // The entry was not in table, so add it + + counter_base[L2LEARN_ERROR_MISS] += 1; + + if (msm->global_learn_count == msm->global_learn_limit) { + // Global limit reached. Do not learn the mac but forward the packet. + // In the future, limits could also be per-interface or bridge-domain. + counter_base[L2LEARN_ERROR_LIMIT] += 1; + goto done; + + } else { + BVT(clib_bihash_kv) kv; + // It is ok to learn + + result0->raw = 0; // clear all fields + result0->fields.sw_if_index = sw_if_index0; + // TODO: set timestamp in entry to clock for dataplane aging + kv.key = key0->raw; + kv.value = result0->raw; + + BV(clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */); + + cached_key->raw = ~0; // invalidate the cache + msm->global_learn_count++; + } + + } else { + + // The entry was in the table, but with the wrong sw_if_index mapping (mac move) + counter_base[L2LEARN_ERROR_MAC_MOVE] += 1; + + if (result0->fields.static_mac) { + // Don't overwrite a static mac + // TODO: Check violation policy. For now drop the packet + b0->error = node->errors[L2LEARN_ERROR_MAC_MOVE_VIOLATE]; + *next0 = L2LEARN_NEXT_DROP; + } else { + // Update the entry + // TODO: may want to rate limit mac moves + // TODO: check global/bridge domain/interface learn limits + BVT(clib_bihash_kv) kv; + + result0->raw = 0; // clear all fields + result0->fields.sw_if_index = sw_if_index0; + + kv.key = key0->raw; + kv.value = result0->raw; + + cached_key->raw = ~0; // invalidate the cache + + BV(clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */); + } + } + + if (result0->fields.filter) { + // drop packet because lookup matched a filter mac entry + + if (*next0 != L2LEARN_NEXT_DROP) { + // if we're not already dropping the packet, do it now + b0->error = node->errors[L2LEARN_ERROR_FILTER_DROP]; + *next0 = L2LEARN_NEXT_DROP; + } + } + +done: + return; +} + + +static uword +l2learn_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2learn_next_t next_index; + l2learn_main_t * msm = &l2learn_main; + vlib_node_t *n = vlib_get_node (vm, l2learn_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + l2fib_entry_key_t cached_key; + l2fib_entry_result_t cached_result; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + // Clear the one-entry cache in case mac table was updated + cached_key.raw = ~0; + cached_result.raw = ~0; /* warning be gone */ + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t * h0, * h1; + l2fib_entry_key_t key0, key1; + l2fib_entry_result_t result0, result1; + u32 bucket0, bucket1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + /* Process 2 x pkts */ + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2learn_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2learn_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->bd_index = vnet_buffer(b1)->l2.bd_index; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + /* process 2 pkts */ + em->counters[node_counter_base_index + L2LEARN_ERROR_L2LEARN] += 2; + + l2fib_lookup_2 (msm->mac_table, &cached_key, &cached_result, + h0->src_address, + h1->src_address, + vnet_buffer(b0)->l2.bd_index, + vnet_buffer(b1)->l2.bd_index, + &key0, + &key1, + &bucket0, + &bucket1, + &result0, + &result1); + + l2learn_process (node, msm, &em->counters[node_counter_base_index], + b0, sw_if_index0, &key0, &cached_key, + &bucket0, &result0, &next0); + + l2learn_process (node, msm, &em->counters[node_counter_base_index], + b1, sw_if_index1, &key1, &cached_key, + &bucket1, &result1, &next1); + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t * h0; + l2fib_entry_key_t key0; + l2fib_entry_result_t result0; + u32 bucket0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + l2learn_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + /* process 1 pkt */ + em->counters[node_counter_base_index + L2LEARN_ERROR_L2LEARN] += 1; + + l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result, + h0->src_address, vnet_buffer(b0)->l2.bd_index, + &key0, + &bucket0, + &result0); + + l2learn_process (node, msm, &em->counters[node_counter_base_index], + b0, sw_if_index0, &key0, &cached_key, + &bucket0, &result0, &next0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2learn_node,static) = { + .function = l2learn_node_fn, + .name = "l2-learn", + .vector_size = sizeof (u32), + .format_trace = format_l2learn_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2learn_error_strings), + .error_strings = l2learn_error_strings, + + .n_next_nodes = L2LEARN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2LEARN_NEXT_DROP] = "error-drop", + [L2LEARN_NEXT_L2FWD] = "l2-fwd", + }, +}; + + +clib_error_t *l2learn_init (vlib_main_t *vm) +{ + l2learn_main_t * mp = &l2learn_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2learn_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + mp->feat_next_node_index); + + /* init the hash table ptr */ + mp->mac_table = get_mac_table(); + + // Set the default number of dynamically learned macs to the number + // of buckets. + mp->global_learn_limit = L2FIB_NUM_BUCKETS * 16; + + return 0; +} + +VLIB_INIT_FUNCTION (l2learn_init); + + +// set subinterface learn enable/disable +// The CLI format is: +// set interface l2 learn <interface> [disable] +static clib_error_t * +int_learn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the interface flag + l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_LEARN, enable); + + done: + return error; +} + +VLIB_CLI_COMMAND (int_learn_cli, static) = { + .path = "set interface l2 learn", + .short_help = "set interface l2 learn <interface> [disable]", + .function = int_learn, +}; + + +static clib_error_t * +l2learn_config (vlib_main_t * vm, unformat_input_t * input) +{ + l2learn_main_t *mp = &l2learn_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "limit %d", &mp->global_learn_limit)) + ; + + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + return 0; +} + +VLIB_CONFIG_FUNCTION (l2learn_config, "l2learn"); + diff --git a/vnet/vnet/l2/l2_learn.h b/vnet/vnet/l2/l2_learn.h new file mode 100644 index 00000000000..25674858fc9 --- /dev/null +++ b/vnet/vnet/l2/l2_learn.h @@ -0,0 +1,47 @@ +/* + * l2_learn.c : layer 2 learning using l2fib + * + * Copyright (c) 2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2learn_h +#define included_l2learn_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + + +typedef struct { + + // Hash table + BVT(clib_bihash) *mac_table; + + // number of dynamically learned mac entries + u32 global_learn_count; + + // maximum number of dynamically learned mac entries + u32 global_learn_limit; + + // Next nodes for each feature + u32 feat_next_node_index[32]; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2learn_main_t; + + +l2learn_main_t l2learn_main; + +#endif diff --git a/vnet/vnet/l2/l2_output.c b/vnet/vnet/l2/l2_output.c new file mode 100644 index 00000000000..72c3d0374e3 --- /dev/null +++ b/vnet/vnet/l2/l2_output.c @@ -0,0 +1,541 @@ +/* + * l2_output.c : layer 2 output packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_output.h> + + +// Feature graph node names +static char * l2output_feat_names[] = { +#define _(sym,name) name, + foreach_l2output_feat +#undef _ +}; + +char **l2output_get_feat_names(void) { + return l2output_feat_names; +} + +l2output_main_t l2output_main; + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; +} l2output_trace_t; + +/* packet trace format function */ +static u8 * format_l2output_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2output_trace_t * t = va_arg (*args, l2output_trace_t *); + + s = format (s, "l2-output: sw_if_index %d dst %U src %U", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src); + return s; +} + + +#define foreach_l2output_error \ +_(L2OUTPUT, "L2 output packets") \ +_(EFP_DROP, "L2 EFP filter pre-rewrite drops") \ +_(VTR_DROP, "L2 output tag rewrite drops") \ +_(SHG_DROP, "L2 split horizon drops") \ +_(DROP, "L2 output drops") + +typedef enum { +#define _(sym,str) L2OUTPUT_ERROR_##sym, + foreach_l2output_error +#undef _ + L2OUTPUT_N_ERROR, +} l2output_error_t; + +static char * l2output_error_strings[] = { +#define _(sym,string) string, + foreach_l2output_error +#undef _ +}; + +typedef enum { + L2OUTPUT_NEXT_DROP, + L2OUTPUT_N_NEXT, +} l2output_next_t; + +// Return 0 if split horizon check passes, otherwise return non-zero +// Packets should not be transmitted out an interface with the same +// split-horizon group as the input interface, except if the shg is 0 +// in which case the check always passes. +static_always_inline u32 +split_horizon_violation (u8 shg1, u8 shg2) +{ + if (PREDICT_TRUE (shg1 == 0)) { + return 0; + } else { + return shg1 == shg2; + } +} + + +static uword +l2output_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2output_next_t next_index; + l2output_main_t * msm = &l2output_main; + vlib_node_t *n = vlib_get_node (vm, l2output_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + u32 cached_sw_if_index; + u32 cached_next_index; + + /* Invalidate cache */ + cached_sw_if_index = ~0; + cached_next_index = ~0; /* warning be gone */ + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t * h0, * h1; + l2_output_config_t * config0, * config1; + u32 feature_bitmap0, feature_bitmap1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, * p4 , * p5; + u32 sw_if_index2, sw_if_index3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + // Prefetch the buffer header for the N+2 loop iteration + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + // Note: no need to prefetch packet data. This node doesn't reference it. + + // Prefetch the input config for the N+1 loop iteration + // This depends on the buffer header above + sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_TX]; + sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_TX]; + CLIB_PREFETCH (&msm->configs[sw_if_index2], CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&msm->configs[sw_if_index3], CLIB_CACHE_LINE_BYTES, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* TX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX]; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2output_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2output_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + em->counters[node_counter_base_index + L2OUTPUT_ERROR_L2OUTPUT] += 2; + + // Get config for the output interface + config0 = vec_elt_at_index(msm->configs, sw_if_index0); + config1 = vec_elt_at_index(msm->configs, sw_if_index1); + + // Get features from the config + // TODO: mask out any non-applicable features + feature_bitmap0 = config0->feature_bitmap; + feature_bitmap1 = config1->feature_bitmap; + + // Determine next node + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2output_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b0, + sw_if_index0, + feature_bitmap0, + &next0); + + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2output_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b1, + sw_if_index1, + feature_bitmap1, + &next1); + + // Perform output vlan tag rewrite and the pre-vtr EFP filter check. + // The EFP Filter only needs to be run if there is an output VTR + // configured. The flag for the post-vtr EFP Filter node is used + // to trigger the pre-vtr check as well. + + if (PREDICT_FALSE (config0->output_vtr.push_and_pop_bytes)) { + // Perform pre-vtr EFP filter check if configured + u32 failed1 = (feature_bitmap0 & L2OUTPUT_FEAT_EFP_FILTER) && + (l2_efp_filter_process(b0, &(config0->input_vtr))); + u32 failed2 = l2_vtr_process(b0, &(config0->output_vtr)); + + if (PREDICT_FALSE (failed1 | failed2)) { + next0 = L2OUTPUT_NEXT_DROP; + if (failed2) { + b0->error = node->errors[L2OUTPUT_ERROR_VTR_DROP]; + } + if (failed1) { + b0->error = node->errors[L2OUTPUT_ERROR_EFP_DROP]; + } + } + } + + if (PREDICT_FALSE (config1->output_vtr.push_and_pop_bytes)) { + // Perform pre-vtr EFP filter check if configured + u32 failed1 = (feature_bitmap1 & L2OUTPUT_FEAT_EFP_FILTER) && + (l2_efp_filter_process(b1, &(config1->input_vtr))); + u32 failed2 = l2_vtr_process(b1, &(config1->output_vtr)); + + if (PREDICT_FALSE (failed1 | failed2)) { + next1 = L2OUTPUT_NEXT_DROP; + if (failed2) { + b1->error = node->errors[L2OUTPUT_ERROR_VTR_DROP]; + } + if (failed1) { + b1->error = node->errors[L2OUTPUT_ERROR_EFP_DROP]; + } + } + } + + // Perform the split horizon check + // The check can only fail for non-zero shg's + if (PREDICT_FALSE (config0->shg + config1->shg)) { + // one of the checks might fail, check both + if (split_horizon_violation (config0->shg, vnet_buffer(b0)->l2.shg)) { + next0 = L2OUTPUT_NEXT_DROP; + b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP]; + } + if (split_horizon_violation (config1->shg, vnet_buffer(b1)->l2.shg)) { + next1 = L2OUTPUT_NEXT_DROP; + b1->error = node->errors[L2OUTPUT_ERROR_SHG_DROP]; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t * h0; + l2_output_config_t *config0; + u32 feature_bitmap0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + l2output_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + h0 = vlib_buffer_get_current (b0); + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + em->counters[node_counter_base_index + L2OUTPUT_ERROR_L2OUTPUT] += 1; + + // Get config for the output interface + config0 = vec_elt_at_index(msm->configs, sw_if_index0); + + // Get features from the config + // TODO: mask out any non-applicable features + feature_bitmap0 = config0->feature_bitmap; + + // Determine next node + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2output_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b0, + sw_if_index0, + feature_bitmap0, + &next0); + + // Perform output vlan tag rewrite and the pre-vtr EFP filter check. + // The EFP Filter only needs to be run if there is an output VTR + // configured. The flag for the post-vtr EFP Filter node is used + // to trigger the pre-vtr check as well. + + if (config0->output_vtr.push_and_pop_bytes) { + // Perform pre-vtr EFP filter check if configured + u32 failed1 = (feature_bitmap0 & L2OUTPUT_FEAT_EFP_FILTER) && + (l2_efp_filter_process(b0, &(config0->input_vtr))); + u32 failed2 = l2_vtr_process(b0, &(config0->output_vtr)); + + if (PREDICT_FALSE (failed1 | failed2)) { + next0 = L2OUTPUT_NEXT_DROP; + if (failed2) { + b0->error = node->errors[L2OUTPUT_ERROR_VTR_DROP]; + } + if (failed1) { + b0->error = node->errors[L2OUTPUT_ERROR_EFP_DROP]; + } + } + } + + // Perform the split horizon check + if (PREDICT_FALSE (split_horizon_violation (config0->shg, vnet_buffer(b0)->l2.shg))) { + next0 = L2OUTPUT_NEXT_DROP; + b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP]; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2output_node) = { + .function = l2output_node_fn, + .name = "l2-output", + .vector_size = sizeof (u32), + .format_trace = format_l2output_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2output_error_strings), + .error_strings = l2output_error_strings, + + .n_next_nodes = L2OUTPUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2OUTPUT_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2output_init (vlib_main_t *vm) +{ + l2output_main_t * mp = &l2output_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Create the config vector + vec_validate(mp->configs, 100); + // Until we hook up the CLI config, just create 100 sw interface entries and zero them + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2output_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names(), + mp->next_nodes.feat_next_node_index); + + // Initialize the output node mapping table + l2output_init_output_node_vec(&mp->next_nodes.output_node_index_vec); + + return 0; +} + +VLIB_INIT_FUNCTION (l2output_init); + +typedef struct { + u32 node_index; + u32 sw_if_index; +} output_node_mapping_rpc_args_t; + +#if DPDK > 0 +static void output_node_rpc_callback +( output_node_mapping_rpc_args_t * a); + +static void output_node_mapping_send_rpc +(u32 node_index, + u32 sw_if_index) +{ + output_node_mapping_rpc_args_t args; + + args.node_index = node_index; + args.sw_if_index = sw_if_index; + + vl_api_rpc_call_main_thread (output_node_rpc_callback, + (u8 *) &args, sizeof (args)); +} +#endif + + +// Create a mapping in the next node mapping table for the given sw_if_index +u32 l2output_create_output_node_mapping ( + vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 node_index, // index of current node + u32 * output_node_index_vec, + u32 sw_if_index) { + + u32 next; // index of next graph node + vnet_hw_interface_t *hw0; + u32 *node; +#if DPDK > 0 + uword cpu_number; + + cpu_number = os_get_cpu_number(); + + if (cpu_number) + { + output_node_mapping_send_rpc (node_index, sw_if_index); + return 0; + } +#endif + + hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + + // dynamically create graph node arc + next = vlib_node_add_next (vlib_main, + node_index, + hw0->output_node_index); + + // Initialize vector with the mapping + + node = vec_elt_at_index(output_node_index_vec, sw_if_index); + *node = next; + + return next; +} + +#if DPDK > 0 +void output_node_rpc_callback (output_node_mapping_rpc_args_t *a) +{ + vlib_main_t * vm = vlib_get_main(); + vnet_main_t * vnm = vnet_get_main(); + l2output_main_t * mp = &l2output_main; + + (void) l2output_create_output_node_mapping + (vm, vnm, a->node_index, mp->next_nodes.output_node_index_vec, + a->sw_if_index); +} +#endif + +// Get a pointer to the config for the given interface +l2_output_config_t * l2output_intf_config (u32 sw_if_index) +{ + l2output_main_t * mp = &l2output_main; + + vec_validate(mp->configs, sw_if_index); + return vec_elt_at_index(mp->configs, sw_if_index); +} + +// Enable (or disable) the feature in the bitmap for the given interface +void l2output_intf_bitmap_enable (u32 sw_if_index, + u32 feature_bitmap, + u32 enable) +{ + l2output_main_t * mp = &l2output_main; + l2_output_config_t *config; + + vec_validate(mp->configs, sw_if_index); + config = vec_elt_at_index(mp->configs, sw_if_index); + + if (enable) { + config->feature_bitmap |= feature_bitmap; + } else { + config->feature_bitmap &= ~feature_bitmap; + } +} diff --git a/vnet/vnet/l2/l2_output.h b/vnet/vnet/l2/l2_output.h new file mode 100644 index 00000000000..0d171b82541 --- /dev/null +++ b/vnet/vnet/l2/l2_output.h @@ -0,0 +1,219 @@ +/* + * l2_output.h : layer 2 output packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_output_h +#define included_vnet_l2_output_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> + + +// The L2 output feature configuration, a per-interface struct +typedef struct { + + u32 feature_bitmap; + + // vlan tag rewrite for ingress and egress + // ingress vtr is located here because the same config data is used for + // the egress EFP filter check + vtr_config_t input_vtr; + vtr_config_t output_vtr; + + // some of these flags may get integrated into the feature bitmap + u8 fwd_enable; + u8 flood_enable; + + // split horizon group + u8 shg; + +} l2_output_config_t; + + +// The set of next nodes for features and interface output. +// Each output feature node should include this. +typedef struct { + // vector of output next node index, indexed by sw_if_index. + // used when all output features have been executed and the + // next nodes are the interface output nodes. + u32 * output_node_index_vec; + + // array of next node index for each output feature, indexed + // by l2output_feat_t. Used to determine next feature node. + u32 feat_next_node_index[32]; + +} l2_output_next_nodes_st; + + +typedef struct { + // Next nodes for features and output interfaces + l2_output_next_nodes_st next_nodes; + + /* config vector indexed by sw_if_index */ + l2_output_config_t *configs; + + /* Convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2output_main_t; + +l2output_main_t l2output_main; +vlib_node_registration_t l2output_node; + +// L2 output features + +// Mappings from feature ID to graph node name +#define foreach_l2output_feat \ + _(SPAN, "feature-bitmap-drop") \ + _(CFM, "feature-bitmap-drop") \ + _(QOS, "feature-bitmap-drop") \ + _(ACL, "l2-output-acl") \ + _(L2PT, "feature-bitmap-drop") \ + _(EFP_FILTER, "l2-efp-filter") \ + _(IPIW, "feature-bitmap-drop") \ + _(STP_BLOCKED, "feature-bitmap-drop") \ + _(LINESTATUS_DOWN, "feature-bitmap-drop") \ + _(XCRW, "l2-xcrw") + +// Feature bitmap positions +typedef enum { +#define _(sym,str) L2OUTPUT_FEAT_##sym##_BIT, + foreach_l2output_feat +#undef _ + L2OUTPUT_N_FEAT, +} l2output_feat_t; + +// Feature bit masks +typedef enum { +#define _(sym,str) L2OUTPUT_FEAT_##sym = (1<<L2OUTPUT_FEAT_##sym##_BIT), + foreach_l2output_feat +#undef _ +} l2output_feat_masks_t; + +// Return an array of strings containing graph node names of each feature +char **l2output_get_feat_names(void); + + +// The next set of functions is for use by output feature graph nodes. +// When the last bit has been cleared from the output feature bitmap, +// the next node is the output graph node for the TX sw_if_index. +// These functions help the feature nodes get that node index. + +// Create a mapping to the output graph node for the given sw_if_index +u32 l2output_create_output_node_mapping ( + vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 node_index, // index of current node + u32 * output_node_index_vec, + u32 sw_if_index); + +// Initialize the next node mapping table +always_inline +void l2output_init_output_node_vec (u32 **output_node_index_vec) { + + // Size it at 100 sw_if_indexes initially + // Uninitialized mappings are set to ~0 + vec_validate_init_empty(*output_node_index_vec, 100, ~0); +} + + +// Get a mapping from the output node mapping table, +// creating the entry if necessary. +always_inline +u32 l2output_get_output_node (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 node_index, // index of current node + u32 sw_if_index, + u32 ** output_node_index_vec) // may be updated +{ + u32 next; // index of next graph node + + // Insure the vector is big enough + vec_validate_init_empty(*output_node_index_vec, sw_if_index, ~0); + + // Get the mapping for the sw_if_index + next = vec_elt(*output_node_index_vec, sw_if_index); + + if (next == ~0) { + // Mapping doesn't exist so create it + next = l2output_create_output_node_mapping (vlib_main, + vnet_main, + node_index, + *output_node_index_vec, + sw_if_index); + } + + return next; +} + + +// Determine the next L2 node based on the output feature bitmap +always_inline void +l2_output_dispatch (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + vlib_node_runtime_t * node, + u32 node_index, + u32 * cached_sw_if_index, + u32 * cached_next_index, + l2_output_next_nodes_st *next_nodes, + vlib_buffer_t * b0, + u32 sw_if_index, + u32 feature_bitmap, + u32 *next0) +{ + if (feature_bitmap) { + // There are some features to execute + + // Save bitmap for the next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap; + + // Determine the next node + *next0 = feat_bitmap_get_next_node_index(next_nodes->feat_next_node_index, + feature_bitmap); + } else { + // There are no features. Send packet to TX node for sw_if_index0 + // This is a little tricky in that the output interface next node indexes + // are not precomputed at init time. + + if (sw_if_index == *cached_sw_if_index) { + // We hit in the one-entry cache. Use it. + *next0 = *cached_next_index; + } else { + // Look up the output TX node + *next0 = l2output_get_output_node(vlib_main, + vnet_main, + node_index, + sw_if_index, + &next_nodes->output_node_index_vec); + + // Update the one-entry cache + *cached_sw_if_index = sw_if_index; + *cached_next_index = *next0; + } + } +} + +// Get a pointer to the config for the given interface +l2_output_config_t * l2output_intf_config (u32 sw_if_index); + +// Enable (or disable) the feature in the bitmap for the given interface +void l2output_intf_bitmap_enable (u32 sw_if_index, + u32 feature_bitmap, + u32 enable); + +#endif diff --git a/vnet/vnet/l2/l2_output_acl.c b/vnet/vnet/l2/l2_output_acl.c new file mode 100644 index 00000000000..2f6c1dce41f --- /dev/null +++ b/vnet/vnet/l2/l2_output_acl.c @@ -0,0 +1,335 @@ +/* + * l2_output_acl.c : layer 2 output acl processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_output.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + + +typedef struct { + // Next nodes for features and output interfaces + l2_output_next_nodes_st next_nodes; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_outacl_main_t; + + + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 next_index; + u32 sw_if_index; +} l2_outacl_trace_t; + +/* packet trace format function */ +static u8 * format_l2_outacl_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_outacl_trace_t * t = va_arg (*args, l2_outacl_trace_t *); + + s = format (s, "l2-output-acl: sw_if_index %d dst %U src %U", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src); + return s; +} + +l2_outacl_main_t l2_outacl_main; + +static vlib_node_registration_t l2_outacl_node; + +#define foreach_l2_outacl_error \ +_(L2_OUTACL, "L2 output ACL packets") \ +_(DROP, "L2 output drops") + +typedef enum { +#define _(sym,str) L2_OUTACL_ERROR_##sym, + foreach_l2_outacl_error +#undef _ + L2_OUTACL_N_ERROR, +} l2_outacl_error_t; + +static char * l2_outacl_error_strings[] = { +#define _(sym,string) string, + foreach_l2_outacl_error +#undef _ +}; + +typedef enum { + L2_OUTACL_NEXT_DROP, + L2_OUTACL_N_NEXT, +} l2_outacl_next_t; + + + +static uword +l2_outacl_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_outacl_next_t next_index; + l2_outacl_main_t * msm = &l2_outacl_main; + vlib_node_t *n = vlib_get_node (vm, l2_outacl_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + u32 cached_sw_if_index = (u32)~0; + u32 cached_next_index = (u32)~0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (0 && n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t * h0, * h1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* TX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX]; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2_outacl_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2_outacl_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] += 2; + + /* add core loop code here */ + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t * h0; + u32 feature_bitmap0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + l2_outacl_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] += 1; + + // L2_OUTACL code + // Dummy for now, just go to next feature node + + + // Remove ourself from the feature bitmap + feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_ACL; + + // Determine next node + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2_outacl_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b0, + sw_if_index0, + feature_bitmap0, + &next0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2_outacl_node,static) = { + .function = l2_outacl_node_fn, + .name = "l2-output-acl", + .vector_size = sizeof (u32), + .format_trace = format_l2_outacl_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_outacl_error_strings), + .error_strings = l2_outacl_error_strings, + + .n_next_nodes = L2_OUTACL_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_OUTACL_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2_outacl_init (vlib_main_t *vm) +{ + l2_outacl_main_t * mp = &l2_outacl_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2_outacl_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names(), + mp->next_nodes.feat_next_node_index); + + // Initialize the output node mapping table + l2output_init_output_node_vec(&mp->next_nodes.output_node_index_vec); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_outacl_init); + +// set subinterface outacl enable/disable +// The CLI format is: +// set interface acl output <interface> [disable] +static clib_error_t * +int_l2_outacl (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the interface flag + l2output_intf_bitmap_enable(sw_if_index, L2OUTPUT_FEAT_ACL, enable); + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l2_outacl_cli, static) = { + .path = "set interface acl output", + .short_help = "set interface acl output <interface> [disable]", + .function = int_l2_outacl, +}; diff --git a/vnet/vnet/l2/l2_patch.c b/vnet/vnet/l2/l2_patch.c new file mode 100644 index 00000000000..63be409d3b8 --- /dev/null +++ b/vnet/vnet/l2/l2_patch.c @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/error.h> + +typedef struct { + u32 cached_next_index; + u32 cached_rx_sw_if_index; + + /* vector of dispositions, indexed by rx_sw_if_index */ + u32 *tx_next_by_rx_sw_if_index; + u32 *tx_sw_if_index_by_rx_sw_if_index; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_patch_main_t; + +typedef struct { + u32 rx_sw_if_index; + u32 tx_sw_if_index; +} l2_patch_trace_t; + +/* packet trace format function */ +static u8 * format_l2_patch_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_patch_trace_t * t = va_arg (*args, l2_patch_trace_t *); + + s = format (s, "L2_PATCH: rx %d tx %d", t->rx_sw_if_index, + t->tx_sw_if_index); + return s; +} + +l2_patch_main_t l2_patch_main; + +static vlib_node_registration_t l2_patch_node; + +#define foreach_l2_patch_error \ +_(PATCHED, "L2 patch packets") \ +_(DROPPED, "L2 patch misconfigured drops") + +typedef enum { +#define _(sym,str) L2_PATCH_ERROR_##sym, + foreach_l2_patch_error +#undef _ + L2_PATCH_N_ERROR, +} l2_patch_error_t; + +static char * l2_patch_error_strings[] = { +#define _(sym,string) string, + foreach_l2_patch_error +#undef _ +}; + +typedef enum { + L2_PATCH_NEXT_DROP, + L2_PATCH_N_NEXT, +} l2_patch_next_t; + +static uword +l2_patch_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_patch_next_t next_index; + l2_patch_main_t * l2pm = &l2_patch_main; + vlib_node_t *n = vlib_get_node (vm, l2_patch_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + /* So stupid / simple, we don't need to prefetch data */ + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + ASSERT(l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0); + ASSERT(l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0); + ASSERT(l2pm->tx_next_by_rx_sw_if_index[sw_if_index1] != ~0); + ASSERT(l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1] != ~0); + + if (PREDICT_TRUE (sw_if_index0 == l2pm->cached_rx_sw_if_index)) + next0 = l2pm->cached_next_index; + else + { + next0 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index0]; + l2pm->cached_rx_sw_if_index = sw_if_index0; + l2pm->cached_next_index = next0; + } + + if (PREDICT_TRUE (sw_if_index1 == l2pm->cached_rx_sw_if_index)) + next1 = l2pm->cached_next_index; + else + next1 = l2pm->tx_next_by_rx_sw_if_index [sw_if_index1]; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2_patch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rx_sw_if_index = sw_if_index0; + t->tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index [sw_if_index0]; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2_patch_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->rx_sw_if_index = sw_if_index1; + t->tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index [sw_if_index1]; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + ASSERT(l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0); + ASSERT(l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0); + + if (PREDICT_TRUE (sw_if_index0 == l2pm->cached_rx_sw_if_index)) + next0 = l2pm->cached_next_index; + else + { + next0 = l2pm->tx_next_by_rx_sw_if_index [sw_if_index0]; + l2pm->cached_rx_sw_if_index = sw_if_index0; + l2pm->cached_next_index = next0; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2_patch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rx_sw_if_index = sw_if_index0; + t->tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index [sw_if_index0]; + } + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + em->counters[node_counter_base_index + L2_PATCH_ERROR_PATCHED] += + frame->n_vectors; + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (l2_patch_node, static) = { + .function = l2_patch_node_fn, + .name = "l2_patch", + .vector_size = sizeof (u32), + .format_trace = format_l2_patch_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_patch_error_strings), + .error_strings = l2_patch_error_strings, + + .n_next_nodes = L2_PATCH_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_PATCH_NEXT_DROP] = "error-drop", + }, +}; + +int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add) +{ + l2_patch_main_t * l2pm = &l2_patch_main; + vnet_hw_interface_t * rxhi, *txhi; + u32 tx_next_index; + + /* + * We assume that the API msg handler has used 2x VALIDATE_SW_IF_INDEX + * macros... + */ + + rxhi = vnet_get_sup_hw_interface (l2pm->vnet_main, rx_sw_if_index); + + /* Make sure caller didn't pass a vlan subif, etc. */ + if (rxhi->sw_if_index != rx_sw_if_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + txhi = vnet_get_sup_hw_interface (l2pm->vnet_main, tx_sw_if_index); + if (txhi->sw_if_index != tx_sw_if_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX_2; + + if (is_add) + { + tx_next_index = vlib_node_add_next (l2pm->vlib_main, + l2_patch_node.index, + txhi->output_node_index); + + vec_validate_init_empty (l2pm->tx_next_by_rx_sw_if_index, + rx_sw_if_index, ~0); + + l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = tx_next_index; + vec_validate_init_empty (l2pm->tx_sw_if_index_by_rx_sw_if_index, + rx_sw_if_index, ~0); + l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index] + = txhi->sw_if_index; + + ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + + vnet_hw_interface_rx_redirect_to_node (l2pm->vnet_main, + rxhi->hw_if_index, + l2_patch_node.index); + } + else + { + ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index, + 0 /* disable promiscuous mode */); + + vnet_hw_interface_rx_redirect_to_node (l2pm->vnet_main, + rxhi->hw_if_index, + ~0 /* disable */); + if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index) + { + l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0; + l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index] = ~0; + } + } + + return 0; +} + +static clib_error_t * +test_patch_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2_patch_main_t * l2pm = &l2_patch_main; + unformat_input_t _line_input, * line_input = &_line_input; + u32 rx_sw_if_index, tx_sw_if_index; + int rv; + int rx_set = 0; + int tx_set = 0; + int is_add = 1; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "rx %U", unformat_vnet_sw_interface, + l2pm->vnet_main, &rx_sw_if_index)) + rx_set = 1; + else if (unformat (line_input, "tx %U", unformat_vnet_sw_interface, + l2pm->vnet_main, &tx_sw_if_index)) + tx_set = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else break; + } + + if (rx_set == 0) + return clib_error_return (0, "rx interface not set"); + + if (tx_set == 0) + return clib_error_return (0, "tx interface not set"); + + rv = vnet_l2_patch_add_del (rx_sw_if_index, tx_sw_if_index, is_add); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return (0, "rx interface not a physical port"); + + case VNET_API_ERROR_INVALID_SW_IF_INDEX_2: + return clib_error_return (0, "tx interface not a physical port"); + + default: + return clib_error_return + (0, "WARNING: vnet_l2_patch_add_del returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (test_patch_command, static) = { + .path = "test l2patch", + .short_help = + "rx <intfc> tx <intfc> [del]", + .function = test_patch_command_fn, +}; + +// Display the contents of the l2patch table. +static clib_error_t * +show_l2patch (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2_patch_main_t * l2pm = &l2_patch_main; + u32 rx_sw_if_index; + u32 no_entries = 1; + + ASSERT(vec_len(l2pm->tx_next_by_rx_sw_if_index) == + vec_len(l2pm->tx_sw_if_index_by_rx_sw_if_index)); + + for (rx_sw_if_index = 0; + rx_sw_if_index < vec_len (l2pm->tx_sw_if_index_by_rx_sw_if_index); + rx_sw_if_index++) + { + u32 tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index]; + if (tx_sw_if_index != ~0) + { + no_entries = 0; + vlib_cli_output (vm, "%26U -> %U", + format_vnet_sw_if_index_name, + l2pm->vnet_main, rx_sw_if_index, + format_vnet_sw_if_index_name, + l2pm->vnet_main,tx_sw_if_index); + } + } + + if (no_entries) + vlib_cli_output (vm, "no l2patch entries"); + + return 0; +} + +VLIB_CLI_COMMAND (show_l2patch_cli, static) = { + .path = "show l2patch", + .short_help = "Show l2 interface cross-connect entries", + .function = show_l2patch, +}; + +clib_error_t *l2_patch_init (vlib_main_t *vm) +{ + l2_patch_main_t * mp = &l2_patch_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_patch_init); diff --git a/vnet/vnet/l2/l2_vtr.c b/vnet/vnet/l2/l2_vtr.c new file mode 100644 index 00000000000..a7499041009 --- /dev/null +++ b/vnet/vnet/l2/l2_vtr.c @@ -0,0 +1,448 @@ +/* + * l2_vtr.c : layer 2 vlan tag rewrite configuration + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_output.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> +#include <vnet/l2/l2_input_vtr.h> +#include <vnet/l2/l2_output.h> + +#include <vppinfra/error.h> +#include <vlib/cli.h> + + +// Just a placeholder. Also insures file is not eliminated by linker. +clib_error_t *l2_vtr_init (vlib_main_t *vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION(l2_vtr_init); + + +// Configure vtag tag rewrite on the given interface. +// Return 1 if there is an error, 0 if ok +u32 l2vtr_configure (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index, + u32 vtr_op, + u32 push_dot1q, // ethertype of first pushed tag is dot1q/dot1ad + u32 vtr_tag1, // first pushed tag + u32 vtr_tag2) // second pushed tag +{ + vnet_hw_interface_t * hi; + vnet_sw_interface_t * si; + u32 hw_no_tags; + u32 error = 0; + vtr_config_t * in_config; + vtr_config_t * out_config; + u32 enable; + u32 push_inner_et; + u32 push_outer_et; + u32 cfg_tags; + + hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index)) { + error = VNET_API_ERROR_INVALID_INTERFACE; // non-ethernet interface + goto done; + } + + // Init the config for this interface + vec_validate (l2output_main.configs, sw_if_index); + in_config = &(vec_elt_at_index(l2output_main.configs, sw_if_index)->input_vtr); + out_config = &(vec_elt_at_index(l2output_main.configs, sw_if_index)->output_vtr); + in_config->raw_tags = 0; + out_config->raw_tags = 0; + + // Get the configured tags for the interface + si = vnet_get_sw_interface (vnet_main, sw_if_index); + hw_no_tags = (si->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + + // Construct the input tag-rewrite config + + push_outer_et = clib_net_to_host_u16 (push_dot1q ? ETHERNET_TYPE_VLAN : ETHERNET_TYPE_DOT1AD); + push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN); + vtr_tag1 = clib_net_to_host_u16 (vtr_tag1); + vtr_tag2 = clib_net_to_host_u16 (vtr_tag2); + + // Determine number of vlan tags with explictly configured values + cfg_tags = 0; + if (hw_no_tags || si->sub.eth.flags.no_tags) { + cfg_tags = 0; + } else if (si->sub.eth.flags.one_tag) { + cfg_tags = 1; + if (si->sub.eth.flags.outer_vlan_id_any) { + cfg_tags = 0; + } + } else if (si->sub.eth.flags.two_tags) { + cfg_tags = 2; + if (si->sub.eth.flags.inner_vlan_id_any) { + cfg_tags = 1; + } + if (si->sub.eth.flags.outer_vlan_id_any) { + cfg_tags = 0; + } + } + + switch (vtr_op) { + case L2_VTR_DISABLED: + in_config->push_and_pop_bytes = 0; + break; + + case L2_VTR_POP_1: + if (cfg_tags < 1) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need one or two tags + goto done; + } + in_config->pop_bytes = 4; + in_config->push_bytes = 0; + break; + + case L2_VTR_POP_2: + if (cfg_tags < 2) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need two tags + goto done; + } + in_config->pop_bytes = 8; + in_config->push_bytes = 0; + + out_config->push_bytes = in_config->pop_bytes; + out_config->pop_bytes = in_config->push_bytes; + break; + + case L2_VTR_PUSH_1: + in_config->pop_bytes = 0; + in_config->push_bytes = 4; + in_config->tags[1].priority_cfi_and_id = vtr_tag1; + in_config->tags[1].type = push_outer_et; + break; + + case L2_VTR_PUSH_2: + in_config->pop_bytes = 0; + in_config->push_bytes = 8; + in_config->tags[0].priority_cfi_and_id = vtr_tag1; + in_config->tags[0].type = push_outer_et; + in_config->tags[1].priority_cfi_and_id = vtr_tag2; + in_config->tags[1].type = push_inner_et; + break; + + case L2_VTR_TRANSLATE_1_1: + if (cfg_tags < 1) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need one or two tags + goto done; + } + in_config->pop_bytes = 4; + in_config->push_bytes = 4; + in_config->tags[1].priority_cfi_and_id = vtr_tag1; + in_config->tags[1].type = push_outer_et; + break; + + case L2_VTR_TRANSLATE_1_2: + if (cfg_tags < 1) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need one or two tags + goto done; + } + in_config->pop_bytes = 4; + in_config->push_bytes = 8; + in_config->tags[0].priority_cfi_and_id = vtr_tag1; + in_config->tags[0].type = push_outer_et; + in_config->tags[1].priority_cfi_and_id = vtr_tag2; + in_config->tags[1].type = push_inner_et; + break; + + case L2_VTR_TRANSLATE_2_1: + if (cfg_tags < 2) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need two tags + goto done; + } + in_config->pop_bytes = 8; + in_config->push_bytes = 4; + in_config->tags[1].priority_cfi_and_id = vtr_tag1; + in_config->tags[1].type = push_outer_et; + break; + + case L2_VTR_TRANSLATE_2_2: + if (cfg_tags < 2) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need two tags + goto done; + } + in_config->pop_bytes = 8; + in_config->push_bytes = 8; + in_config->tags[0].priority_cfi_and_id = vtr_tag1; + in_config->tags[0].type = push_outer_et; + in_config->tags[1].priority_cfi_and_id = vtr_tag2; + in_config->tags[1].type = push_inner_et; + break; + } + + // Construct the output tag-rewrite config + + // The push/pop values are always reversed + out_config->push_bytes = in_config->pop_bytes; + out_config->pop_bytes = in_config->push_bytes; + + // Any pushed tags are derived from the subinterface config + push_outer_et = clib_net_to_host_u16 (si->sub.eth.flags.dot1ad ? ETHERNET_TYPE_DOT1AD : ETHERNET_TYPE_VLAN); + push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN); + vtr_tag1 = clib_net_to_host_u16 (si->sub.eth.outer_vlan_id); + vtr_tag2 = clib_net_to_host_u16 (si->sub.eth.inner_vlan_id); + + if (out_config->push_bytes == 4) { + out_config->tags[1].priority_cfi_and_id = vtr_tag1; + out_config->tags[1].type = push_outer_et; + } else if (out_config->push_bytes == 8) { + out_config->tags[0].priority_cfi_and_id = vtr_tag1; + out_config->tags[0].type = push_outer_et; + out_config->tags[1].priority_cfi_and_id = vtr_tag2; + out_config->tags[1].type = push_inner_et; + } + + // set the interface enable flags + enable = (vtr_op != L2_VTR_DISABLED); + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_VTR, enable); + // output vtr enable is checked explicitly in l2_output + + done: + return error; +} + +// Get vtag tag rewrite on the given interface. +// Return 1 if there is an error, 0 if ok +u32 l2vtr_get (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index, + u32 *vtr_op, + u32 *push_dot1q, // ethertype of first pushed tag is dot1q/dot1ad + u32 *vtr_tag1, // first pushed tag + u32 *vtr_tag2) // second pushed tag +{ + vnet_hw_interface_t * hi; + u32 error = 0; + vtr_config_t * in_config; + + if (!vtr_op || !push_dot1q || !vtr_tag1 || !vtr_tag2) { + clib_warning ("invalid arguments"); + error = VNET_API_ERROR_INVALID_ARGUMENT; + goto done; + } + + *vtr_op = L2_VTR_DISABLED; + *vtr_tag1 = 0; + *vtr_tag2 = 0; + *push_dot1q = 0; + + hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index)) { + // non-ethernet interface + goto done; + } + + if (sw_if_index >= vec_len(l2output_main.configs)) { + // no specific config (return disabled) + goto done; + } + + // Get the config for this interface + in_config = &(vec_elt_at_index(l2output_main.configs, sw_if_index)->input_vtr); + + // DISABLED + if (in_config->push_and_pop_bytes == 0) { + goto done; + } + + // find out vtr_op + switch (in_config->pop_bytes) { + case 0: + switch (in_config->push_bytes) { + case 0: + // DISABLED + goto done; + case 4: + *vtr_op = L2_VTR_PUSH_1; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[1].type)); + break; + case 8: + *vtr_op = L2_VTR_PUSH_2; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id); + *vtr_tag2 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[0].type)); + break; + default: + clib_warning ("invalid push_bytes count: %d", in_config->push_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + break; + + case 4: + switch (in_config->push_bytes) { + case 0: + *vtr_op = L2_VTR_POP_1; + break; + case 4: + *vtr_op = L2_VTR_TRANSLATE_1_1; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[1].type)); + break; + case 8: + *vtr_op = L2_VTR_TRANSLATE_1_2; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id); + *vtr_tag2 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[0].type)); + break; + default: + clib_warning ("invalid push_bytes count: %d", in_config->push_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + break; + + case 8: + switch (in_config->push_bytes) { + case 0: + *vtr_op = L2_VTR_POP_2; + break; + case 4: + *vtr_op = L2_VTR_TRANSLATE_2_1; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[1].type)); + break; + case 8: + *vtr_op = L2_VTR_TRANSLATE_2_2; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id); + *vtr_tag2 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[0].type)); + break; + default: + clib_warning ("invalid push_bytes count: %d", in_config->push_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + break; + + default: + clib_warning ("invalid pop_bytes count: %d", in_config->pop_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + + done: + return error; +} + +// set subinterface vtr enable/disable +// The CLI format is: +// set interface l2 tag-rewrite <interface> [disable | pop 1 | pop 2 | push {dot1q|dot1ad} <tag> [<tag>]] +// "push" can also be replaced by "translate-{1|2}-{1|2}" +static clib_error_t * +int_l2_vtr (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 vtr_op; + u32 push_dot1q = 0; + u32 tag1 = 0, tag2 = 0; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + vtr_op = L2_VTR_DISABLED; + + if (unformat (input, "disable")) { + vtr_op = L2_VTR_DISABLED; + } else if (unformat (input, "pop 1")) { + vtr_op = L2_VTR_POP_1; + } else if (unformat (input, "pop 2")) { + vtr_op = L2_VTR_POP_2; + + } else if (unformat (input, "push dot1q %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_PUSH_2; + push_dot1q = 1; + } else if (unformat (input, "push dot1ad %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_PUSH_2; + + } else if (unformat (input, "push dot1q %d", &tag1)) { + vtr_op = L2_VTR_PUSH_1; + push_dot1q = 1; + } else if (unformat (input, "push dot1ad %d", &tag1)) { + vtr_op = L2_VTR_PUSH_1; + + } else if (unformat (input, "translate 1-1 dot1q %d", &tag1)) { + vtr_op = L2_VTR_TRANSLATE_1_1; + push_dot1q = 1; + } else if (unformat (input, "translate 1-1 dot1ad %d", &tag1)) { + vtr_op = L2_VTR_TRANSLATE_1_1; + + } else if (unformat (input, "translate 2-1 dot1q %d", &tag1)) { + vtr_op = L2_VTR_TRANSLATE_2_1; + push_dot1q = 1; + } else if (unformat (input, "translate 2-1 dot1ad %d", &tag1)) { + vtr_op = L2_VTR_TRANSLATE_2_1; + + } else if (unformat (input, "translate 2-2 dot1q %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_TRANSLATE_2_2; + push_dot1q = 1; + } else if (unformat (input, "translate 2-2 dot1ad %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_TRANSLATE_2_2; + + } else if (unformat (input, "translate 1-2 dot1q %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_TRANSLATE_1_2; + push_dot1q = 1; + } else if (unformat (input, "translate 1-2 dot1ad %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_TRANSLATE_1_2; + + } else { + error = clib_error_return (0, "expecting [disable | pop 1 | pop 2 | push {dot1q|dot1ah} <tag> [<tag>]\n" + " | translate {1|2}-{1|2} {dot1q|dot1ah} <tag> [<tag>]] but got `%U'", + format_unformat_error, input); + goto done; + } + + if (l2vtr_configure (vm, + vnm, + sw_if_index, + vtr_op, + push_dot1q, + tag1, + tag2)) { + error = clib_error_return (0, "vlan tag rewrite is not compatible with interface"); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l2_vtr_cli, static) = { + .path = "set interface l2 tag-rewrite", + .short_help = "set interface l2 tag-rewrite <interface> [disable | pop {1|2} | push {dot1q|dot1ad} <tag> <tag>]", + .function = int_l2_vtr, +}; + diff --git a/vnet/vnet/l2/l2_vtr.h b/vnet/vnet/l2/l2_vtr.h new file mode 100644 index 00000000000..aef6c6d255e --- /dev/null +++ b/vnet/vnet/l2/l2_vtr.h @@ -0,0 +1,167 @@ +/* + * l2_vtr.h : layer 2 vlan tag rewrite processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_vtr_h +#define included_vnet_l2_vtr_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/l2_vtr.h> + +// VTR config options for API and CLI support +typedef enum { + L2_VTR_DISABLED, + L2_VTR_PUSH_1, + L2_VTR_PUSH_2, + L2_VTR_POP_1, + L2_VTR_POP_2, + L2_VTR_TRANSLATE_1_1, + L2_VTR_TRANSLATE_1_2, + L2_VTR_TRANSLATE_2_1, + L2_VTR_TRANSLATE_2_2 +} l2_vtr_op_t; + +// Per-interface vlan tag rewrite configuration +// There will be one instance of this struct for each sw_if_index +// for both input vtr and output vtr +typedef struct { + union { + // Up to two vlan tags to push. + // if there is only one vlan tag to push, it is in tags[1]. + ethernet_vlan_header_tv_t tags[2]; + u64 raw_tags; + }; + + union { + struct { + u8 push_bytes; // number of bytes to push for up to 2 vlans (0,4,8) + u8 pop_bytes; // number of bytes to pop for up to 2 vlans (0,4,8) + }; + u16 push_and_pop_bytes; // if 0 then the feature is disabled + }; +} vtr_config_t; + + +// Perform the configured tag rewrite on the packet. +// Return 0 if ok, 1 if packet should be dropped (e.g. tried to pop too many tags) +always_inline u32 +l2_vtr_process (vlib_buffer_t * b0, + vtr_config_t * config) +{ + u64 temp_8; + u32 temp_4; + u8 * eth; + + eth = vlib_buffer_get_current (b0); + + // copy the 12B dmac and smac to a temporary location + temp_8 = *((u64 *)eth); + temp_4 = *((u32 *)(eth+8)); + + // adjust for popped tags + eth += config->pop_bytes; + + // if not enough tags to pop then drop packet + if (PREDICT_FALSE ((vnet_buffer(b0)->l2.l2_len - 12) < config->pop_bytes)) { + return 1; + } + + // copy the 2 new tags to the start of the packet + *((u64 *)(eth + 12 - 8)) = config->raw_tags; + + // TODO: set cos bits + + // adjust for pushed tags: + eth -= config->push_bytes; + + // copy the 12 dmac and smac back to the packet + *((u64 *)eth) = temp_8; + *((u32 *)(eth+8)) = temp_4; + + // Update l2_len + vnet_buffer(b0)->l2.l2_len += (word)config->push_bytes - (word)config->pop_bytes; + + // Update packet len + vlib_buffer_advance(b0, (word)config->pop_bytes - (word)config->push_bytes); + + return 0; +} + + +// Perform the egress pre-vlan tag rewrite EFP Filter check. The post-vlan tag rewrite +// check is a separate graph node. +// +// This check insures that a packet being output to an interface (before output vtr +// is performed) has vlan tags that match those on a packet received from that +// interface (after vtr has been performed). +// This means verifying that any tags pushed by input vtr are present on the packet. +// +// Return 0 if ok, 1 if packet should be dropped. +// This function should be passed the input vtr config for the interface. +always_inline u8 +l2_efp_filter_process (vlib_buffer_t * b0, + vtr_config_t * in_config) +{ + u8 * eth; + u64 packet_tags; + u64 tag_mask; + + eth = vlib_buffer_get_current (b0); + + // If there are 2 tags pushed, they must match config->tags[0] and config->tags[1]. + // If there is one tag pushed, it must match config->tag[1]. + // If there are 0 tags pushed, the check passes. + + // mask for two vlan id and ethertypes, no cos bits + tag_mask = clib_net_to_host_u64(0xFFFF0FFFFFFF0FFF); + // mask for one vlan id and ethertype, no cos bits + tag_mask = (in_config->push_bytes == 4) ? clib_net_to_host_u64(0xFFFF0FFF) : tag_mask; + // mask for always match + tag_mask = (in_config->push_bytes == 0) ? 0 : tag_mask; + + // Read 8B from the packet, getting the proper set of vlan tags + // For 0 push bytes, the address doesn't matter since the mask clears the data to 0. + packet_tags = *((u64 *)(eth + 4 + in_config->push_bytes)); + + // Check if the packet tags match the configured tags + return (packet_tags & tag_mask) != in_config->raw_tags; +} + + +// Configure vtag tag rewrite on the given interface. +// Return 1 if there is an error, 0 if ok +u32 l2vtr_configure(vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index, + u32 vtr_op, + u32 push_dot1q, + u32 vtr_tag1, + u32 vtr_tag2); + +// Get vtag tag rewrite on the given interface. +// Return 1 if there is an error, 0 if ok +u32 l2vtr_get (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index, + u32 *vtr_op, + u32 *push_dot1q, + u32 *vtr_tag1, + u32 *vtr_tag2); + +#endif // included_vnet_l2_vtr_h + diff --git a/vnet/vnet/l2/l2_xcrw.c b/vnet/vnet/l2/l2_xcrw.c new file mode 100644 index 00000000000..f5fe3ca14e4 --- /dev/null +++ b/vnet/vnet/l2/l2_xcrw.c @@ -0,0 +1,559 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/l2/l2_xcrw.h> + +/* + * General L2 / L3 cross-connect, used to set up + * "L2 interface <--> your-favorite-tunnel-encap" tunnels. + * + * We set up a typical L2 cross-connect or (future) bridge + * to hook L2 interface(s) up to the L3 stack in arbitrary ways. + * + * Each l2_xcrw adjacency specifies 3 things: + * + * 1. The next graph node (presumably in the L3 stack) to + * process the (L2 -> L3) packet + * + * 2. A new value for vnet_buffer(b)->sw_if_index[VLIB_TX] + * (i.e. a lookup FIB index), + * + * 3. A rewrite string to apply. + * + * Example: to cross-connect an L2 interface or (future) bridge + * to an mpls-o-gre tunnel, set up the L2 rewrite string as shown in + * mpls_gre_rewrite, and use "mpls-post-rewrite" to fix the + * GRE IP header checksum and length fields. + */ + +typedef struct { + u32 next_index; + u32 tx_fib_index; +} l2_xcrw_trace_t; + +/* packet trace format function */ +static u8 * format_l2_xcrw_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_xcrw_trace_t * t = va_arg (*args, l2_xcrw_trace_t *); + + s = format (s, "L2_XCRW: next index %d tx_fib_index %d", + t->next_index, t->tx_fib_index); + return s; +} + +l2_xcrw_main_t l2_xcrw_main; + +static vlib_node_registration_t l2_xcrw_node; + +static char * l2_xcrw_error_strings[] = { +#define _(sym,string) string, + foreach_l2_xcrw_error +#undef _ +}; + +static uword +l2_xcrw_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_xcrw_next_t next_index; + l2_xcrw_main_t * xcm = &l2_xcrw_main; + vlib_node_t *n = vlib_get_node (vm, l2_xcrw_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + l2_xcrw_adjacency_t * adj0, * adj1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0); + adj1 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index1); + + next0 = adj0->rewrite_header.next_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + adj0->rewrite_header.sw_if_index; + + next1 = adj1->rewrite_header.next_index; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = + adj1->rewrite_header.sw_if_index; + + em->counters[node_counter_base_index + next1]++; + + if (PREDICT_TRUE(next0 > 0)) + { + u8 * h0 = vlib_buffer_get_current (b0); + vnet_rewrite_one_header (adj0[0], h0, + adj0->rewrite_header.data_bytes); + vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); + em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++; + } + + if (PREDICT_TRUE(next1 > 0)) + { + u8 * h1 = vlib_buffer_get_current (b1); + vnet_rewrite_one_header (adj1[0], h1, + adj1->rewrite_header.data_bytes); + vlib_buffer_advance (b1, -adj1->rewrite_header.data_bytes); + em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++; + } + + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_xcrw_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->tx_fib_index = adj0->rewrite_header.sw_if_index; + } + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_xcrw_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->next_index = next1; + t->tx_fib_index = adj1->rewrite_header.sw_if_index; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + l2_xcrw_adjacency_t * adj0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0); + + next0 = adj0->rewrite_header.next_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + adj0->rewrite_header.sw_if_index; + + if (PREDICT_TRUE(next0 > 0)) + { + u8 *h0 = vlib_buffer_get_current (b0); + vnet_rewrite_one_header (adj0[0], h0, + adj0->rewrite_header.data_bytes); + vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); + em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_xcrw_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->tx_fib_index = adj0->rewrite_header.sw_if_index; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (l2_xcrw_node, static) = { + .function = l2_xcrw_node_fn, + .name = "l2-xcrw", + .vector_size = sizeof (u32), + .format_trace = format_l2_xcrw_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_xcrw_error_strings), + .error_strings = l2_xcrw_error_strings, + + .n_next_nodes = L2_XCRW_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_XCRW_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2_xcrw_init (vlib_main_t *vm) +{ + l2_xcrw_main_t * mp = &l2_xcrw_main; + + mp->vlib_main = vm; + mp->vnet_main = &vnet_main; + mp->tunnel_index_by_l2_sw_if_index = hash_create (0, sizeof(uword)); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_xcrw_init); + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +static u8 * format_xcrw_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "xcrw%d", dev_instance); +} + +VNET_DEVICE_CLASS (xcrw_device_class,static) = { + .name = "Xcrw", + .format_device_name = format_xcrw_name, + .tx_function = dummy_interface_tx, +}; + +/* Create a sham tunnel interface and return its sw_if_index */ +static u32 +create_xcrw_interface (vlib_main_t * vm) +{ + vnet_main_t * vnm = vnet_get_main(); + static u32 instance; + u8 address[6]; + u32 hw_if_index; + vnet_hw_interface_t * hi; + u32 sw_if_index; + + /* mac address doesn't really matter */ + memset (address, 0, sizeof (address)); + address[2] = 0x12; + + /* can returns error iff phy != 0 */ + (void) ethernet_register_interface + (vnm, + xcrw_device_class.index, + instance++, + address, + &hw_if_index, + /* flag change */ 0); + + hi = vnet_get_hw_interface (vnm, hw_if_index); + sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + /* Output to the sham tunnel invokes the encap node */ + hi->output_node_index = l2_xcrw_node.index; + + return sw_if_index; +} + +int vnet_configure_l2_xcrw (vlib_main_t * vm, vnet_main_t *vnm, + u32 l2_sw_if_index, u32 tx_fib_index, + u8 * rewrite, u32 next_node_index, int is_add) +{ + l2_xcrw_main_t * xcm = &l2_xcrw_main; + l2_xcrw_adjacency_t * a; + l2_xcrw_tunnel_t * t; + uword * p; + + if (is_add) + { + + pool_get (xcm->tunnels, t); + + /* No interface allocated? Do it. Otherwise, set admin up */ + if (t->tunnel_sw_if_index == 0) + t->tunnel_sw_if_index = create_xcrw_interface (vm); + else + vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + t->l2_sw_if_index = l2_sw_if_index; + + vec_validate (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + + a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + memset (a, 0, sizeof (*a)); + + a->rewrite_header.sw_if_index = tx_fib_index; + + /* + * Add or find a dynamic disposition for the successor node, + * e.g. so we can ship pkts to mpls_post_rewrite... + */ + a->rewrite_header.next_index = + vlib_node_add_next (vm, l2_xcrw_node.index, next_node_index); + + if (vec_len (rewrite)) + vnet_rewrite_set_data (a[0], rewrite, vec_len(rewrite)); + + set_int_l2_mode (vm, vnm, MODE_L2_XC, t->l2_sw_if_index, 0, 0, 0, + t->tunnel_sw_if_index); + hash_set (xcm->tunnel_index_by_l2_sw_if_index, + t->l2_sw_if_index, t - xcm->tunnels); + return 0; + } + else + { + p = hash_get (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index); + if (p == 0) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + t = pool_elt_at_index (xcm->tunnels, p[0]); + + a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + /* Reset adj to drop traffic */ + memset (a, 0, sizeof (*a)); + + set_int_l2_mode (vm, vnm, MODE_L3, t->l2_sw_if_index, 0, 0, 0, 0); + + vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index, 0 /* down */); + + hash_unset (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index); + pool_put (xcm->tunnels, t); + } + return 0; +} + + +static clib_error_t * +set_l2_xcrw_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + int is_add = 1; + int is_ipv6 = 0; /* for fib id -> fib index mapping */ + u32 tx_fib_id = ~0; + u32 tx_fib_index = ~0; + u32 next_node_index = ~0; + u32 l2_sw_if_index; + u8 * rw = 0; + vnet_main_t * vnm = vnet_get_main(); + int rv; + + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + if (! unformat (line_input, "%U", + unformat_vnet_sw_interface, vnm, &l2_sw_if_index)) + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "next %U", + unformat_vlib_node, vm, &next_node_index)) + ; + else if (unformat (line_input, "tx-fib-id %d", &tx_fib_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "ipv6")) + is_ipv6 = 1; + else if (unformat (line_input, "rw %U", + unformat_hex_string, &rw)); + else + break; + } + + if (next_node_index == ~0) + return clib_error_return (0, "next node not specified"); + + if (tx_fib_id != ~0) + { + uword * p; + + if (is_ipv6) + p = hash_get (ip6_main.fib_index_by_table_id, tx_fib_id); + else + p = hash_get (ip4_main.fib_index_by_table_id, tx_fib_id); + + if (p == 0) + return clib_error_return (0, "nonexistent tx_fib_id %d", + tx_fib_id); + + tx_fib_index = p[0]; + } + + rv = vnet_configure_l2_xcrw (vm, vnm, l2_sw_if_index, tx_fib_index, + rw, next_node_index, is_add); + + switch (rv) + { + + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return (0, "%U not cross-connected", + format_vnet_sw_if_index_name, + vnm, l2_sw_if_index); + default: + return clib_error_return (0, "vnet_configure_l2_xcrw returned %d", + rv); + } + + vec_free (rw); + + return 0; +} + +VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = { + .path = "set interface l2 xcrw", + .short_help = + "set int l2 xcrw <interface> next <node-name>\n" + " [del] [tx-fib-id <id>] [ipv6] rw <hex-bytes>", + .function = set_l2_xcrw_command_fn, +}; + +static u8 * format_l2xcrw (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + l2_xcrw_tunnel_t * t = va_arg (*args, l2_xcrw_tunnel_t *); + l2_xcrw_main_t * xcm = &l2_xcrw_main; + vlib_main_t * vm = vlib_get_main (); + l2_xcrw_adjacency_t * a; + u8 * rewrite_string; + + if (t == 0) + { + s = format (s, "%-25s%s", "L2 interface", "Tunnel Details"); + return s; + } + + s = format (s, "%-25U %U ", + format_vnet_sw_if_index_name, vnm, t->l2_sw_if_index, + format_vnet_sw_if_index_name, vnm, t->tunnel_sw_if_index); + + a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + + s = format (s, "next %U ", + format_vlib_next_node_name, vm, l2_xcrw_node.index, + a->rewrite_header.next_index); + + if (a->rewrite_header.sw_if_index != ~0) + s = format (s, "tx fib index %d ", a->rewrite_header.sw_if_index); + + if (a->rewrite_header.data_bytes) + { + rewrite_string = (u8 *)(a + 1); + rewrite_string -= a->rewrite_header.data_bytes; + s = format (s, "rewrite data: %U ", + format_hex_bytes, rewrite_string, + a->rewrite_header.data_bytes); + } + + s = format (s, "\n"); + + return s; +} + + +static clib_error_t * +show_l2xcrw_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + l2_xcrw_main_t * xcm = &l2_xcrw_main; + l2_xcrw_tunnel_t * t; + + if (pool_elts (xcm->tunnels) == 0) + { + vlib_cli_output (vm, "No L2 / L3 rewrite cross-connects configured"); + return 0; + } + + vlib_cli_output (vm, "%U", format_l2xcrw, 0, 0); + + pool_foreach (t, xcm->tunnels, + ({ + vlib_cli_output (vm, "%U", format_l2xcrw, vnm, t); + })); + + return 0; +} + +VLIB_CLI_COMMAND (show_l2xcrw_command, static) = { + .path = "show l2xcrw", + .short_help = "Display L2/L3 rewrite cross-connects", + .function = show_l2xcrw_command_fn, +}; diff --git a/vnet/vnet/l2/l2_xcrw.h b/vnet/vnet/l2/l2_xcrw.h new file mode 100644 index 00000000000..d32d1e8df5c --- /dev/null +++ b/vnet/vnet/l2/l2_xcrw.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_l2_xcrw_h__ +#define __included_l2_xcrw_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_output.h> +#include <vnet/api_errno.h> +#include <vnet/ethernet/ethernet.h> + +typedef struct { + /* + * Let: rewrite_header.sw_if_index = tx_fib_index or ~0. + * rewrite_header.next_index = L2_XCRW_NEXT_XXX + */ + vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE); +} l2_xcrw_adjacency_t; + +typedef struct { + /* L2 interface */ + u32 l2_sw_if_index; + + /* Tunnel interface */ + u32 tunnel_sw_if_index; /* This field remains set in freed pool elts */ + +} l2_xcrw_tunnel_t; + +typedef struct { + u32 cached_next_index; + + /* Vector of cross-connect rewrites */ + l2_xcrw_adjacency_t * adj_by_sw_if_index; + + /* Pool of xcrw tunnels */ + l2_xcrw_tunnel_t * tunnels; + + /* Tunnel index by tunnel sw_if_index */ + uword * tunnel_index_by_l2_sw_if_index; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_xcrw_main_t; + +typedef enum { + L2_XCRW_NEXT_DROP, + L2_XCRW_N_NEXT, +} l2_xcrw_next_t; + +#define foreach_l2_xcrw_error \ +_(DROP, "Packets dropped") \ +_(FWD, "Packets forwarded") + +typedef enum { +#define _(sym,str) L2_XCRW_ERROR_##sym, + foreach_l2_xcrw_error +#undef _ + L2_XCRW_N_ERROR, +} l2_xcrw_error_t; + +#endif /* __included_l2_xcrw_h__ */ diff --git a/vnet/vnet/l2tp/decap.c b/vnet/vnet/l2tp/decap.c new file mode 100644 index 00000000000..1a2bc4890d0 --- /dev/null +++ b/vnet/vnet/l2tp/decap.c @@ -0,0 +1,253 @@ +/* + * decap.c : L2TPv3 tunnel decapsulation + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/l2tp/l2tp.h> + +/* Statistics (not really errors) */ +#define foreach_l2t_decap_error \ +_(USER_TO_NETWORK, "L2TP user (ip6) to L2 network pkts") \ +_(SESSION_ID_MISMATCH, "l2tpv3 local session id mismatches") \ +_(COOKIE_MISMATCH, "l2tpv3 local cookie mismatches") + +static char * l2t_decap_error_strings[] = { +#define _(sym,string) string, + foreach_l2t_decap_error +#undef _ +}; + +typedef enum { +#define _(sym,str) L2T_DECAP_ERROR_##sym, + foreach_l2t_decap_error +#undef _ + L2T_DECAP_N_ERROR, +} l2t_DECAP_error_t; + +typedef enum { + L2T_DECAP_NEXT_DROP, + L2T_DECAP_NEXT_L2_INPUT, + L2T_DECAP_N_NEXT, + /* Pseudo next index */ + L2T_DECAP_NEXT_NO_INTERCEPT = L2T_DECAP_N_NEXT, +} l2t_decap_next_t; + +#define NSTAGES 3 + +static inline void stage0 (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 buffer_index) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index); + vlib_prefetch_buffer_header (b, STORE); + /* l2tpv3 header is a long way away, need 2 cache lines */ + CLIB_PREFETCH (b->data, 2*CLIB_CACHE_LINE_BYTES, STORE); +} + +static inline void stage1 (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + l2t_main_t *lm = &l2t_main; + ip6_header_t * ip6 = vlib_buffer_get_current (b); + u32 session_index; + uword *p = 0; + l2tpv3_header_t * l2t; + + /* Not L2tpv3 (0x73, 0t115)? Use the normal path. */ + if (PREDICT_FALSE(ip6->protocol != IP_PROTOCOL_L2TP)) { + vnet_buffer(b)->l2t.next_index = L2T_DECAP_NEXT_NO_INTERCEPT; + return; + } + + /* Make up your minds, people... */ + switch (lm->lookup_type) { + case L2T_LOOKUP_SRC_ADDRESS: + p = hash_get_mem (lm->session_by_src_address, &ip6->src_address); + break; + case L2T_LOOKUP_DST_ADDRESS: + p = hash_get_mem (lm->session_by_dst_address, &ip6->dst_address); + break; + case L2T_LOOKUP_SESSION_ID: + l2t = (l2tpv3_header_t*)(ip6+1); + p = hash_get (lm->session_by_session_id, l2t->session_id); + break; + default: + ASSERT(0); + } + + if (PREDICT_FALSE(p == 0)) { + vnet_buffer(b)->l2t.next_index = L2T_DECAP_NEXT_NO_INTERCEPT; + return; + } else { + session_index = p[0]; + } + + /* Remember mapping index, prefetch the mini counter */ + vnet_buffer(b)->l2t.next_index = L2T_DECAP_NEXT_L2_INPUT; + vnet_buffer(b)->l2t.session_index = session_index; + + /* $$$$$ prefetch counter */ +} + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + l2t_main_t *lm = &l2t_main; + ip6_header_t * ip6 = vlib_buffer_get_current (b); + vlib_node_t *n = vlib_get_node (vm, l2t_decap_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + l2tpv3_header_t * l2tp; + u32 counter_index; + l2t_session_t * session; + u32 session_index; + u32 next_index; + + /* Other-than-output pkt? We're done... */ + if (vnet_buffer(b)->l2t.next_index != L2T_DECAP_NEXT_L2_INPUT) { + next_index = vnet_buffer(b)->l2t.next_index; + goto done; + } + + em->counters[node_counter_base_index + L2T_DECAP_ERROR_USER_TO_NETWORK] += 1; + + session_index = vnet_buffer(b)->l2t.session_index; + + counter_index = + session_index_to_counter_index (session_index, + SESSION_COUNTER_USER_TO_NETWORK); + + /* per-mapping byte stats include the ethernet header */ + vlib_increment_combined_counter (&lm->counter_main, + os_get_cpu_number(), + counter_index, + 1 /* packet_increment */, + vlib_buffer_length_in_chain (vm, b) + + sizeof (ethernet_header_t)); + + session = pool_elt_at_index (lm->sessions, session_index); + + l2tp = vlib_buffer_get_current (b) + sizeof (*ip6); + + if (PREDICT_FALSE(l2tp->session_id != session->local_session_id)) { + // Key matched but session id does not. Assume packet is not for us. + em->counters[node_counter_base_index + L2T_DECAP_ERROR_SESSION_ID_MISMATCH] += 1; + next_index = L2T_DECAP_NEXT_NO_INTERCEPT; + goto done; + } + + if (PREDICT_FALSE (l2tp->cookie != session->local_cookie[0])) { + if (l2tp->cookie != session->local_cookie[1]) { + // Key and session ID matched, but cookie doesn't. Drop this packet. + b->error = node->errors[L2T_DECAP_ERROR_COOKIE_MISMATCH]; + next_index = L2T_DECAP_NEXT_DROP; + goto done; + } + } + + vnet_buffer(b)->sw_if_index[VLIB_RX] = session->sw_if_index; + + /* strip the ip6 and L2TP header */ + vlib_buffer_advance (b, sizeof (*ip6) + session->l2tp_hdr_size); + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b); + + if (PREDICT_FALSE(b->flags & VLIB_BUFFER_IS_TRACED)) { + l2t_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t)); + t->is_user_to_network = 1; + t->our_address.as_u64[0] = + ip6->dst_address.as_u64[0]; + t->our_address.as_u64[1] = + ip6->dst_address.as_u64[1]; + t->client_address.as_u64[0] = + ip6->src_address.as_u64[0]; + t->client_address.as_u64[1] = + ip6->src_address.as_u64[1]; + t->session_index = session_index; + } + + return L2T_DECAP_NEXT_L2_INPUT; + + done: + if (next_index == L2T_DECAP_NEXT_NO_INTERCEPT) { + // Go to next node on the ip6 configuration chain + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + ip_config_main_t * cm = &lm->rx_config_mains[VNET_UNICAST]; + ip6_l2tpv3_config_t * c0; + + vnet_get_config_data (&cm->config_main, + &vnet_buffer (b)->ip.current_config_index, + &next_index, + sizeof (c0[0])); + } + + if (PREDICT_FALSE(b->flags & VLIB_BUFFER_IS_TRACED)) { + l2t_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t)); + t->is_user_to_network = 1; + t->our_address.as_u64[0] = + ip6->dst_address.as_u64[0]; + t->our_address.as_u64[1] = + ip6->dst_address.as_u64[1]; + t->client_address.as_u64[0] = + ip6->src_address.as_u64[0]; + t->client_address.as_u64[1] = + ip6->src_address.as_u64[1]; + t->session_index = ~0; + } + return next_index; +} + +#include <vnet/pipeline.h> + +static uword l2t_decap_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + +VLIB_REGISTER_NODE (l2t_decap_node) = { + .function = l2t_decap_node_fn, + .name = "l2tp-decap", + .vector_size = sizeof (u32), + .format_trace = format_l2t_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2t_decap_error_strings), + .error_strings = l2t_decap_error_strings, + + .n_next_nodes = L2T_DECAP_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2T_DECAP_NEXT_L2_INPUT] = "l2-input", + [L2T_DECAP_NEXT_DROP] = "error-drop", + }, +}; + +void l2tp_decap_init (void) +{ + ip6_register_protocol (IP_PROTOCOL_L2TP, l2t_decap_node.index); +} diff --git a/vnet/vnet/l2tp/encap.c b/vnet/vnet/l2tp/encap.c new file mode 100644 index 00000000000..8f26ab007dc --- /dev/null +++ b/vnet/vnet/l2tp/encap.c @@ -0,0 +1,217 @@ +/* + * encap.c : L2TPv3 tunnel encapsulation + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/l2tp/l2tp.h> + +/* Statistics (not really errors) */ +#define foreach_l2t_encap_error \ +_(NETWORK_TO_USER, "L2TP L2 network to user (ip6) pkts") \ +_(LOOKUP_FAIL_TO_L3, "L2TP L2 session lookup failed pkts") + +static char * l2t_encap_error_strings[] = { +#define _(sym,string) string, + foreach_l2t_encap_error +#undef _ +}; + +typedef enum { +#define _(sym,str) L2T_ENCAP_ERROR_##sym, + foreach_l2t_encap_error +#undef _ + L2T_ENCAP_N_ERROR, +} l2t_encap_error_t; + + +typedef enum { + L2T_ENCAP_NEXT_DROP, + L2T_ENCAP_NEXT_IP6_LOOKUP, + L2T_ENCAP_N_NEXT, +} l2t_encap_next_t; + +typedef struct { + u32 cached_session_index; + u32 cached_sw_if_index; + vnet_main_t * vnet_main; +} l2tp_encap_runtime_t; + +vlib_node_registration_t l2t_encap_node; + +#define NSTAGES 3 + +static inline void stage0 (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 buffer_index) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index); + vlib_prefetch_buffer_header (b, STORE); + CLIB_PREFETCH (b->data, 2*CLIB_CACHE_LINE_BYTES, STORE); +} + +static inline void stage1 (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 bi) +{ + l2tp_encap_runtime_t * rt = (void *) node->runtime_data; + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vnet_hw_interface_t * hi; + + u32 sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_TX]; + u32 session_index = rt->cached_session_index; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index)) + { + hi = vnet_get_sup_hw_interface (rt->vnet_main, sw_if_index); + session_index = rt->cached_session_index = hi->dev_instance; + rt->cached_sw_if_index = sw_if_index; + } + + /* Remember mapping index, prefetch the mini counter */ + vnet_buffer(b)->l2t.next_index = L2T_ENCAP_NEXT_IP6_LOOKUP; + vnet_buffer(b)->l2t.session_index = session_index; + + /* $$$$ prefetch counter... */ +} + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + l2t_main_t *lm = &l2t_main; + vlib_node_t *n = vlib_get_node (vm, l2t_encap_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + l2tpv3_header_t * l2tp; + u32 session_index; + u32 counter_index; + l2t_session_t *s; + ip6_header_t *ip6; + u16 payload_length; + + /* Other-than-output pkt? We're done... */ + if (vnet_buffer(b)->l2t.next_index != L2T_ENCAP_NEXT_IP6_LOOKUP) + return vnet_buffer(b)->l2t.next_index; + + /* clear so it is not interpreted as fib_index */ + vnet_buffer(b)->sw_if_index[VLIB_TX] = (u32)~0; + + em->counters[node_counter_base_index + L2T_ENCAP_ERROR_NETWORK_TO_USER] += 1; + + session_index = vnet_buffer(b)->l2t.session_index; + + counter_index = + session_index_to_counter_index (session_index, + SESSION_COUNTER_NETWORK_TO_USER); + + /* per-mapping byte stats include the ethernet header */ + vlib_increment_combined_counter (&lm->counter_main, + os_get_cpu_number(), + counter_index, + 1 /* packet_increment */, + vlib_buffer_length_in_chain (vm, b)); + + s = pool_elt_at_index (lm->sessions, session_index); + + /* Paint on an l2tpv3 hdr */ + vlib_buffer_advance (b, -(s->l2tp_hdr_size)); + l2tp = vlib_buffer_get_current (b); + + l2tp->session_id = s->remote_session_id; + l2tp->cookie = s->remote_cookie; + if (PREDICT_FALSE (s->l2_sublayer_present)) { + l2tp->l2_specific_sublayer = 0; + } + + /* Paint on an ip6 header */ + vlib_buffer_advance (b, -(sizeof (*ip6))); + ip6 = vlib_buffer_get_current (b); + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6<<28); + + /* calculate ip6 payload length */ + payload_length = vlib_buffer_length_in_chain (vm, b); + payload_length -= sizeof (*ip6); + + ip6->payload_length = clib_host_to_net_u16 (payload_length); + ip6->protocol = IP_PROTOCOL_L2TP; + ip6->hop_limit = 0xff; + ip6->src_address.as_u64[0] = s->our_address.as_u64[0]; + ip6->src_address.as_u64[1] = s->our_address.as_u64[1]; + ip6->dst_address.as_u64[0] = s->client_address.as_u64[0]; + ip6->dst_address.as_u64[1] = s->client_address.as_u64[1]; + + if (PREDICT_FALSE(b->flags & VLIB_BUFFER_IS_TRACED)) { + l2t_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t)); + t->is_user_to_network = 0; + t->our_address.as_u64[0] = + ip6->src_address.as_u64[0]; + t->our_address.as_u64[1] = + ip6->src_address.as_u64[1]; + t->client_address.as_u64[0] = + ip6->dst_address.as_u64[0]; + t->client_address.as_u64[1] = + ip6->dst_address.as_u64[1]; + t->session_index = session_index; + } + + return L2T_ENCAP_NEXT_IP6_LOOKUP; +} + +#include <vnet/pipeline.h> + +uword l2t_encap_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + + +VLIB_REGISTER_NODE (l2t_encap_node) = { + .function = l2t_encap_node_fn, + .name = "l2tp-encap", + .vector_size = sizeof (u32), + .format_trace = format_l2t_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .runtime_data_bytes = sizeof (l2tp_encap_runtime_t), + + .n_errors = ARRAY_LEN(l2t_encap_error_strings), + .error_strings = l2t_encap_error_strings, + + .n_next_nodes = L2T_ENCAP_N_NEXT, + + // add dispositions here + .next_nodes = { + [L2T_ENCAP_NEXT_IP6_LOOKUP] = "ip6-lookup", + [L2T_ENCAP_NEXT_DROP] = "error-drop", + }, +}; + +void l2tp_encap_init (vlib_main_t * vm) +{ + l2tp_encap_runtime_t * rt; + + rt = vlib_node_get_runtime_data (vm, l2t_encap_node.index); + rt->vnet_main = vnet_get_main(); + rt->cached_sw_if_index = (u32) ~0; + rt->cached_session_index = (u32) ~0; +} diff --git a/vnet/vnet/l2tp/l2tp.c b/vnet/vnet/l2tp/l2tp.c new file mode 100644 index 00000000000..da28487dd7f --- /dev/null +++ b/vnet/vnet/l2tp/l2tp.c @@ -0,0 +1,696 @@ +/* + * l2tp.c : L2TPv3 tunnel support + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/l2/l2_input.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/l2tp/l2tp.h> + +l2t_main_t l2t_main; + +/* packet trace format function */ +u8 * format_l2t_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2t_trace_t * t = va_arg (*args, l2t_trace_t *); + + if (t->is_user_to_network) + s = format (s, "L2T: %U (client) -> %U (our) session %d", + format_ip6_address, &t->client_address, + format_ip6_address, &t->our_address, + t->session_index); + else + s = format (s, "L2T: %U (our) -> %U (client) session %d)", + format_ip6_address, &t->our_address, + format_ip6_address, &t->client_address, + t->session_index); + return s; +} + +u8 * format_l2t_session (u8 * s, va_list * args) +{ + l2t_session_t * session = va_arg (*args, l2t_session_t *); + l2t_main_t * lm = &l2t_main; + u32 counter_index; + vlib_counter_t v; + + s = format (s, "[%d] %U (our) %U (client) %U (sw_if_index %d)\n", + session - lm->sessions, + format_ip6_address, &session->our_address, + format_ip6_address, &session->client_address, + format_vnet_sw_interface_name, lm->vnet_main, + vnet_get_sw_interface (lm->vnet_main, session->sw_if_index), + session->sw_if_index); + + s = format (s, " local cookies %016llx %016llx remote cookie %016llx\n", + clib_net_to_host_u64 (session->local_cookie[0]), + clib_net_to_host_u64 (session->local_cookie[1]), + clib_net_to_host_u64 (session->remote_cookie)); + + s = format (s, " local session-id %d remote session-id %d\n", + clib_net_to_host_u32 (session->local_session_id), + clib_net_to_host_u32 (session->remote_session_id)); + + s = format (s, " l2 specific sublayer %s\n", + session->l2_sublayer_present ? "preset" : "absent"); + + counter_index = + session_index_to_counter_index (session - lm->sessions, + SESSION_COUNTER_USER_TO_NETWORK); + + vlib_get_combined_counter (&lm->counter_main, counter_index, &v); + if (v.packets != 0) + s = format (s, " user-to-net: %llu pkts %llu bytes\n", + v.packets, v.bytes); + + vlib_get_combined_counter (&lm->counter_main, counter_index+1, &v); + + if (v.packets != 0) + s = format (s, " net-to-user: %llu pkts %llu bytes\n", + v.packets, v.bytes); + return s; +} + +static clib_error_t * +show_l2tp_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2t_session_t *session; + l2t_main_t *lm = &l2t_main; + char * keystr = 0; + int verbose = 0; + + if (unformat (input, "verbose") || unformat (input, "v")) + verbose = 1; + + if (pool_elts (lm->sessions) == 0) + vlib_cli_output (vm, "No l2tp sessions..."); + else + vlib_cli_output (vm, "%u l2tp sessions...", pool_elts (lm->sessions)); + + if (verbose) + { + switch (lm->lookup_type) + { + case L2T_LOOKUP_SRC_ADDRESS: + keystr = "src address"; + break; + + case L2T_LOOKUP_DST_ADDRESS: + keystr = "dst address"; + break; + + case L2T_LOOKUP_SESSION_ID: + keystr = "session id"; + break; + + default: + keystr = "BOGUS!"; + break; + } + + vlib_cli_output (vm, "L2tp session lookup on %s", keystr); + + pool_foreach (session, lm->sessions, + ({ + vlib_cli_output (vm, "%U", format_l2t_session, session); + })); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_session_detail_command, static) = { + .path = "show l2tpv3", + .short_help = "show l2tpv3 [verbose]", + .function = show_l2tp_command_fn, +}; + +static clib_error_t * +test_counters_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2t_session_t *session; + l2t_main_t *lm = &l2t_main; + u32 session_index; + u32 counter_index; + u32 nincr=0; + u32 cpu_index = os_get_cpu_number(); + + pool_foreach (session, lm->sessions, + ({ + session_index = session - lm->sessions; + counter_index = + session_index_to_counter_index (session_index, + SESSION_COUNTER_USER_TO_NETWORK); + vlib_increment_combined_counter (&lm->counter_main, + cpu_index, + counter_index, + 1/*pkt*/, 1111 /*bytes*/); + vlib_increment_combined_counter (&lm->counter_main, + cpu_index, + counter_index+1, + 1/*pkt*/, 2222 /*bytes*/); + nincr++; + + })); + vlib_cli_output (vm, "Incremented %d active counters\n", nincr); + + return 0; +} + +VLIB_CLI_COMMAND (test_counters_command, static) = { + .path = "test counters", + .short_help = "increment all active counters", + .function = test_counters_command_fn, +}; + +static clib_error_t * +clear_counters_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2t_session_t *session; + l2t_main_t *lm = &l2t_main; + u32 session_index; + u32 counter_index; + u32 nincr=0; + + pool_foreach (session, lm->sessions, + ({ + session_index = session - lm->sessions; + counter_index = + session_index_to_counter_index (session_index, + SESSION_COUNTER_USER_TO_NETWORK); + vlib_zero_combined_counter (&lm->counter_main, counter_index); + vlib_zero_combined_counter (&lm->counter_main, counter_index+1); + nincr++; + + })); + vlib_cli_output (vm, "Cleared %d active counters\n", nincr); + + return 0; +} + +VLIB_CLI_COMMAND (clear_counters_command, static) = { + .path = "clear counters", + .short_help = "clear all active counters", + .function = clear_counters_command_fn, +}; + +static u8 * format_l2tpv3_name (u8 * s, va_list * args) +{ + l2t_main_t *lm = &l2t_main; + u32 i = va_arg (*args, u32); + u32 show_dev_instance = ~0; + + if (i < vec_len (lm->dev_inst_by_real)) + show_dev_instance = lm->dev_inst_by_real[i]; + + if (show_dev_instance != ~0) + i = show_dev_instance; + + return format (s, "l2tpv3_tunnel%d", i); +} + +static int l2tpv3_name_renumber (vnet_hw_interface_t * hi, + u32 new_dev_instance) +{ + l2t_main_t *lm = &l2t_main; + + vec_validate_init_empty (lm->dev_inst_by_real, hi->dev_instance, ~0); + + lm->dev_inst_by_real [hi->dev_instance] = new_dev_instance; + + return 0; +} + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +VNET_DEVICE_CLASS (l2tpv3_device_class,static) = { + .name = "L2TPv3", + .format_device_name = format_l2tpv3_name, + .name_renumber = l2tpv3_name_renumber, + .tx_function = dummy_interface_tx, +}; + +static uword dummy_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + /* + * Conundrum: packets from tun/tap destined for the tunnel + * actually have this rewrite applied. Transit packets do not. + * To make the two cases equivalent, don't generate a + * rewrite here, build the entire header in the fast path. + */ + return 0; +} + +static u8 * format_l2tp_header_with_length (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + s = format (s, "unimplemented dev %u", dev_instance); + return s; +} + +VNET_HW_INTERFACE_CLASS (l2tpv3_hw_class) = { + .name = "L2TPV3", + .format_header = format_l2tp_header_with_length, + .set_rewrite = dummy_set_rewrite, +}; + +int create_l2tpv3_ipv6_tunnel (l2t_main_t * lm, + ip6_address_t * client_address, + ip6_address_t * our_address, + u32 local_session_id, + u32 remote_session_id, + u64 local_cookie, + u64 remote_cookie, + int l2_sublayer_present, + u32 * sw_if_index) +{ + l2t_session_t *s = 0; + vnet_main_t * vnm = lm->vnet_main; + vnet_hw_interface_t * hi; + uword * p = (uword *) ~0; + u32 hw_if_index; + l2tpv3_header_t l2tp_hdr; + ip6_address_t * dst_address_copy, * src_address_copy; + u32 counter_index; + + remote_session_id = clib_host_to_net_u32 (remote_session_id); + local_session_id = clib_host_to_net_u32 (local_session_id); + + switch (lm->lookup_type) { + case L2T_LOOKUP_SRC_ADDRESS: + p = hash_get_mem (lm->session_by_src_address, client_address); + break; + + case L2T_LOOKUP_DST_ADDRESS: + p = hash_get_mem (lm->session_by_dst_address, our_address); + break; + + case L2T_LOOKUP_SESSION_ID: + p = hash_get (lm->session_by_session_id, local_session_id); + break; + + default: + ASSERT(0); + } + + /* adding a session: session must not already exist */ + if (p) + return VNET_API_ERROR_INVALID_VALUE; + + pool_get (lm->sessions, s); + memset (s, 0, sizeof (*s)); + memcpy (&s->our_address, our_address, sizeof (s->our_address)); + memcpy (&s->client_address, client_address, sizeof (s->client_address)); + s->local_cookie[0] = clib_host_to_net_u64 (local_cookie); + s->remote_cookie = clib_host_to_net_u64 (remote_cookie); + s->local_session_id = local_session_id; + s->remote_session_id = remote_session_id; + s->l2_sublayer_present = l2_sublayer_present; + /* precompute l2tp header size */ + s->l2tp_hdr_size = l2_sublayer_present ? + sizeof (l2tpv3_header_t) : + sizeof (l2tpv3_header_t) - sizeof(l2tp_hdr.l2_specific_sublayer); + + /* Setup hash table entries */ + switch (lm->lookup_type) { + case L2T_LOOKUP_SRC_ADDRESS: + src_address_copy = clib_mem_alloc (sizeof (*src_address_copy)); + memcpy (src_address_copy, client_address, sizeof (*src_address_copy)); + hash_set_mem (lm->session_by_src_address, src_address_copy, + s - lm->sessions); + break; + case L2T_LOOKUP_DST_ADDRESS: + dst_address_copy = clib_mem_alloc (sizeof (*dst_address_copy)); + memcpy (dst_address_copy, our_address, sizeof (*dst_address_copy)); + hash_set_mem (lm->session_by_dst_address, dst_address_copy, + s - lm->sessions); + break; + case L2T_LOOKUP_SESSION_ID: + hash_set (lm->session_by_session_id, local_session_id, + s - lm->sessions); + break; + + default: + ASSERT(0); + } + + /* validate counters */ + counter_index = + session_index_to_counter_index (s - lm->sessions, + SESSION_COUNTER_USER_TO_NETWORK); + vlib_validate_combined_counter (&lm->counter_main, counter_index); + vlib_validate_combined_counter (&lm->counter_main, counter_index+1); + + if (vec_len (lm->free_l2tpv3_tunnel_hw_if_indices) > 0) + { + hw_if_index = lm->free_l2tpv3_tunnel_hw_if_indices + [vec_len (lm->free_l2tpv3_tunnel_hw_if_indices)-1]; + _vec_len (lm->free_l2tpv3_tunnel_hw_if_indices) -= 1; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = s - lm->sessions; + hi->hw_instance = hi->dev_instance; + } + else + { + hw_if_index = vnet_register_interface + (vnm, l2tpv3_device_class.index, s - lm->sessions, + l2tpv3_hw_class.index, s - lm->sessions); + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->output_node_index = l2t_encap_node.index; + /* $$$$ initialize custom dispositions, if needed */ + } + + s->hw_if_index = hw_if_index; + s->sw_if_index = hi->sw_if_index; + + if (sw_if_index) + *sw_if_index = hi->sw_if_index; + + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + return 0; +} + +static clib_error_t * +create_l2tpv3_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_address_t client_address, our_address; + unformat_input_t _line_input, * line_input = &_line_input; + l2t_main_t *lm = &l2t_main; + u64 local_cookie = (u64)~0, remote_cookie = (u64)~0; + u32 local_session_id = 1, remote_session_id = 1; + int our_address_set = 0, client_address_set = 0; + int l2_sublayer_present = 0; + int rv; + u32 sw_if_index; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "client %U", + unformat_ip6_address, &client_address)) + client_address_set = 1; + else if (unformat (line_input, "our %U", + unformat_ip6_address, &our_address)) + our_address_set = 1; + else if (unformat (line_input, "local-cookie %llx", &local_cookie)) + ; + else if (unformat (line_input, "remote-cookie %llx", &remote_cookie)) + ; + else if (unformat (line_input, "local-session-id %d", + &local_session_id)) + ; + else if (unformat (line_input, "remote-session-id %d", + &remote_session_id)) + ; + else if (unformat (line_input, "l2-sublayer-present")) + l2_sublayer_present = 1; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (our_address_set == 0) + return clib_error_return (0, "our address not specified"); + if (client_address_set == 0) + return clib_error_return (0, "client address not specified"); + + rv = create_l2tpv3_ipv6_tunnel (lm, &client_address, &our_address, + local_session_id, remote_session_id, + local_cookie, remote_cookie, + l2_sublayer_present, + &sw_if_index); + switch(rv) + { + case 0: + break; + case VNET_API_ERROR_INVALID_VALUE: + return clib_error_return (0, "session already exists..."); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "session does not exist..."); + + default: + return clib_error_return (0, "l2tp_session_add_del returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (create_l2tpv3_tunnel_command, static) = { + .path = "create l2tpv3 tunnel", + .short_help = + "create l2tpv3 tunnel client <ip6> our <ip6> local-cookie <hex> remote-cookie <hex> local-session <dec> remote-session <dec>", + .function = create_l2tpv3_tunnel_command_fn, +}; + +int l2tpv3_set_tunnel_cookies (l2t_main_t * lm, + u32 sw_if_index, + u64 new_local_cookie, + u64 new_remote_cookie) +{ + l2t_session_t *s; + vnet_hw_interface_t * hi; + vnet_main_t * vnm = vnet_get_main(); + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + + if (pool_is_free_index (lm->sessions, hi->dev_instance)) + return VNET_API_ERROR_INVALID_VALUE; + + s = pool_elt_at_index (lm->sessions, hi->dev_instance); + + s->local_cookie[1] = s->local_cookie[0]; + s->local_cookie[0] = clib_host_to_net_u64(new_local_cookie); + s->remote_cookie = clib_host_to_net_u64(new_remote_cookie); + + return 0; +} + + +static clib_error_t * +set_l2tp_tunnel_cookie_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2t_main_t *lm = &l2t_main; + vnet_main_t * vnm = vnet_get_main(); + u32 sw_if_index = ~0; + u64 local_cookie = (u64)~0, remote_cookie = (u64)~0; + + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else if (unformat (input, "local %llx", &local_cookie)) + ; + else if (unformat (input, "remote %llx", &remote_cookie)) + ; + else + break; + } + if (sw_if_index == ~0) + return clib_error_return (0, "unknown interface"); + if (local_cookie == ~0) + return clib_error_return (0, "local cookie required"); + if (remote_cookie == ~0) + return clib_error_return (0, "remote cookie required"); + + rv = l2tpv3_set_tunnel_cookies (lm, sw_if_index, + local_cookie, remote_cookie); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return (0, "invalid interface"); + + default: + return clib_error_return (0, "l2tp_session_set_cookies returned %d", + rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (set_l2tp_tunnel_cookie_command, static) = { + .path = "set l2tpv3 tunnel cookie", + .short_help = + "set l2tpv3 tunnel cookie <intfc> local <hex> remote <hex>", + .function = set_l2tp_tunnel_cookie_command_fn, +}; + +int l2tpv3_interface_enable_disable (vnet_main_t * vnm, + u32 sw_if_index, + int enable_disable) +{ + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + ip_config_main_t * rx_cm = &lm->rx_config_mains[VNET_UNICAST]; + u32 ci; + ip6_l2tpv3_config_t config; + ip4_rx_feature_type_t type; + + if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + type = IP6_RX_FEATURE_L2TPV3; + + ci = rx_cm->config_index_by_sw_if_index[sw_if_index]; + ci = (enable_disable + ? vnet_config_add_feature + : vnet_config_del_feature) + (vlib_get_main(), &rx_cm->config_main, + ci, + type, + &config, + sizeof (config)); + rx_cm->config_index_by_sw_if_index[sw_if_index] = ci; + return 0; +} + +/* Enable/disable L2TPv3 intercept on IP6 fowarding path */ +static clib_error_t * +set_ip6_l2tpv3 (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 sw_if_index = ~0; + int is_add = 1; + int rv; + vnet_main_t * vnm = vnet_get_main(); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "interface required"); + + rv = l2tpv3_interface_enable_disable (vnm, sw_if_index, is_add); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return (0, "invalid interface"); + + default: + return clib_error_return (0, "l2tp_interface_enable_disable returned %d", + rv); + } + return 0; +} + +VLIB_CLI_COMMAND (set_interface_ip6_l2tpv3, static) = { + .path = "set interface ip6 l2tpv3", + .function = set_ip6_l2tpv3, + .short_help = "set interface ip6 l2tpv3 <intfc> [del]", +}; + +static clib_error_t * +l2tp_config (vlib_main_t * vm, unformat_input_t * input) +{ + l2t_main_t *lm = &l2t_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "lookup-v6-src")) + lm->lookup_type = L2T_LOOKUP_SRC_ADDRESS; + else if (unformat (input, "lookup-v6-dst")) + lm->lookup_type = L2T_LOOKUP_DST_ADDRESS; + else if (unformat (input, "lookup-session-id")) + lm->lookup_type = L2T_LOOKUP_SESSION_ID; + else return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + return 0; +} + +VLIB_CONFIG_FUNCTION (l2tp_config, "l2tp"); + +clib_error_t *l2tp_init (vlib_main_t *vm) +{ + l2t_main_t *lm = &l2t_main; + ip_main_t * im = &ip_main; + ip_protocol_info_t * pi; + + lm->vnet_main = vnet_get_main(); + lm->vlib_main = vm; + lm->lookup_type = L2T_LOOKUP_DST_ADDRESS; + + lm->session_by_src_address = hash_create_mem + (0, sizeof (ip6_address_t) /* key bytes */, + sizeof (u32) /* value bytes */); + lm->session_by_dst_address = hash_create_mem + (0, sizeof (ip6_address_t) /* key bytes */, + sizeof (u32) /* value bytes */); + lm->session_by_session_id = hash_create (0, sizeof (uword)); + + pi = ip_get_protocol_info (im, IP_PROTOCOL_L2TP); + pi->unformat_pg_edit = unformat_pg_l2tp_header; + + /* insure these nodes are included in build */ + l2tp_encap_init(vm); + l2tp_decap_init(); + + return 0; +} + +VLIB_INIT_FUNCTION(l2tp_init); + diff --git a/vnet/vnet/l2tp/l2tp.h b/vnet/vnet/l2tp/l2tp.h new file mode 100644 index 00000000000..9e7ac131a7f --- /dev/null +++ b/vnet/vnet/l2tp/l2tp.h @@ -0,0 +1,131 @@ +/* + * l2tp.h : L2TPv3 tunnel support + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_l2tp_h__ +#define __included_l2tp_h__ + +#include <vlib/vlib.h> +#include <vnet/ip/ip.h> +#include <vnet/l2tp/packet.h> + +typedef struct { + /* ip6 addresses */ + ip6_address_t our_address; + ip6_address_t client_address; + + /* $$$$ maybe add encap-path lookup fib ID? */ + + /* l2tpv3 header parameters */ + u64 local_cookie[2]; + u64 remote_cookie; + u32 local_session_id; + u32 remote_session_id; + + /* tunnel interface */ + u32 hw_if_index; + u32 sw_if_index; + + u8 l2tp_hdr_size; + u8 l2_sublayer_present; + u8 cookie_flags; /* in host byte order */ +} l2t_session_t; + +typedef enum { + L2T_LOOKUP_SRC_ADDRESS = 0, + L2T_LOOKUP_DST_ADDRESS, + L2T_LOOKUP_SESSION_ID, +} ip6_to_l2_lookup_t; + +typedef struct { + /* session pool */ + l2t_session_t *sessions; + + /* ip6 -> l2 hash tables. Make up your minds, people... */ + uword *session_by_src_address; + uword *session_by_dst_address; + uword *session_by_session_id; + + ip6_to_l2_lookup_t lookup_type; + + /* Counters */ + vlib_combined_counter_main_t counter_main; + + /* vector of free l2tpv3 tunnel interfaces */ + u32 * free_l2tpv3_tunnel_hw_if_indices; + + /* show device instance by real device instance */ + u32 * dev_inst_by_real; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + +} l2t_main_t; + +/* Packet trace structure */ +typedef struct { + int is_user_to_network; + u32 session_index; + ip6_address_t our_address; + ip6_address_t client_address; +} l2t_trace_t; + +l2t_main_t l2t_main; +vlib_node_registration_t l2t_encap_node; +vlib_node_registration_t l2t_decap_node; + +enum { + SESSION_COUNTER_USER_TO_NETWORK=0, + SESSION_COUNTER_NETWORK_TO_USER, +}; + +static inline u32 session_index_to_counter_index (u32 session_index, + u32 counter_id) +{ + return ((session_index << 1) + counter_id); +} + +u8 * format_l2t_trace (u8 * s, va_list * args); + +typedef struct { + // Any per-interface config would go here +} ip6_l2tpv3_config_t; + +uword unformat_pg_l2tp_header (unformat_input_t * input, va_list * args); + +void l2tp_encap_init (vlib_main_t *vm); +void l2tp_decap_init (void); +int create_l2tpv3_ipv6_tunnel (l2t_main_t * lm, + ip6_address_t * client_address, + ip6_address_t * our_address, + u32 local_session_id, + u32 remote_session_id, + u64 local_cookie, + u64 remote_cookie, + int l2_sublayer_present, + u32 * sw_if_index); + +int l2tpv3_set_tunnel_cookies (l2t_main_t * lm, + u32 sw_if_index, + u64 new_local_cookie, + u64 new_remote_cookie); + +int l2tpv3_interface_enable_disable (vnet_main_t * vnm, + u32 sw_if_index, + int enable_disable); + +#endif /* __included_l2tp_h__ */ diff --git a/vnet/vnet/l2tp/packet.h b/vnet/vnet/l2tp/packet.h new file mode 100644 index 00000000000..88acba41d4b --- /dev/null +++ b/vnet/vnet/l2tp/packet.h @@ -0,0 +1,33 @@ +/* + * packet.h : L2TPv3 packet header format + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_l2tp_packet_h__ +#define __included_l2tp_packet_h__ + +/* + * See RFC4719 for packet format. + * Note: the l2_specific_sublayer is present in current Linux l2tpv3 + * tunnels. It is not present in IOS XR l2tpv3 tunnels. + * The Linux implementation is almost certainly wrong. + */ +typedef CLIB_PACKED(struct { + u32 session_id; + u64 cookie; + u32 l2_specific_sublayer; /* set to 0 (if present) */ +}) l2tpv3_header_t; + +#endif /* __included_l2tp_packet_h__ */ diff --git a/vnet/vnet/l2tp/pg.c b/vnet/vnet/l2tp/pg.c new file mode 100644 index 00000000000..394e14681b1 --- /dev/null +++ b/vnet/vnet/l2tp/pg.c @@ -0,0 +1,94 @@ +/* + * pg.c: packet generator for L2TPv3 header + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/l2tp/l2tp.h> + +typedef struct { + pg_edit_t session_id; + pg_edit_t cookie; +} pg_l2tp_header_t; + +typedef struct { + pg_edit_t l2_sublayer; +} pg_l2tp_header_l2_sublayer_t; + +static inline void +pg_l2tp_header_init (pg_l2tp_header_t * e) +{ + pg_edit_init (&e->session_id, l2tpv3_header_t, session_id); + pg_edit_init (&e->cookie, l2tpv3_header_t, cookie); +} + +uword +unformat_pg_l2tp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_l2tp_header_t * h; + u32 group_index, error; + vlib_main_t * vm = vlib_get_main(); + + h = pg_create_edit_group (s, sizeof (h[0]), + sizeof (l2tpv3_header_t) - sizeof(u32), + &group_index); + pg_l2tp_header_init (h); + + error = 1; + + // session id and cookie are required + if (! unformat (input, "L2TP: session_id %U cookie %U", + unformat_pg_edit, unformat_pg_number, &h->session_id, + unformat_pg_edit, unformat_pg_number, &h->cookie)) { + goto done; + } + + // "l2_sublayer <value>" is optional + if (unformat (input, "l2_sublayer")) { + pg_l2tp_header_l2_sublayer_t * h2; + + h2 = pg_add_edits (s, sizeof (h2[0]), sizeof(u32), group_index); + pg_edit_init (&h2->l2_sublayer, l2tpv3_header_t, l2_specific_sublayer); + if (! unformat_user (input, unformat_pg_edit, + unformat_pg_number, &h2->l2_sublayer)) { + goto done; + } + } + + // Parse an ethernet header if it is present + { + pg_node_t * pg_node = 0; + vlib_node_t * eth_lookup_node; + + eth_lookup_node = vlib_get_node_by_name (vm, (u8 *)"ethernet-input"); + ASSERT (eth_lookup_node); + + pg_node = pg_get_node (eth_lookup_node->index); + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + } + + error = 0; + +done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/l3_types.h b/vnet/vnet/l3_types.h new file mode 100644 index 00000000000..2902072148c --- /dev/null +++ b/vnet/vnet/l3_types.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * l3_types.h: layer 3 packet types + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_l3_types_h +#define included_vnet_l3_types_h + +/* Inherit generic L3 packet types from ethernet. */ +typedef enum { +#define ethernet_type(n,f) VNET_L3_PACKET_TYPE_##f, +#include <vnet/ethernet/types.def> +#undef ethernet_type +} vnet_l3_packet_type_t; + +#endif /* included_vnet_l3_types_h */ diff --git a/vnet/vnet/lawful-intercept/lawful_intercept.c b/vnet/vnet/lawful-intercept/lawful_intercept.c new file mode 100644 index 00000000000..bd3f33efa0c --- /dev/null +++ b/vnet/vnet/lawful-intercept/lawful_intercept.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/lawful-intercept/lawful_intercept.h> + +static clib_error_t * +set_li_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + li_main_t * lm = &li_main; + ip4_address_t collector; + u8 collector_set = 0; + ip4_address_t src; + u8 src_set = 0; + u32 tmp; + u16 udp_port = 0; + u8 is_add = 1; + int i; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "collector %U", unformat_ip4_address, &collector)) + collector_set = 1; + if (unformat (input, "src %U", unformat_ip4_address, &src)) + src_set = 1; + else if (unformat (input, "udp-port %d", &tmp)) + udp_port = tmp; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (collector_set == 0) + return clib_error_return (0, "collector must be set..."); + if (src_set == 0) + return clib_error_return (0, "src must be set..."); + if (udp_port == 0) + return clib_error_return (0, "udp-port must be set..."); + + if (is_add == 1) + { + for (i = 0; i < vec_len (lm->collectors); i++) + { + if (lm->collectors[i].as_u32 == collector.as_u32) + { + if (lm->ports[i] == udp_port) + return clib_error_return + (0, "collector %U:%d already configured", + &collector, udp_port); + else + return clib_error_return + (0, "collector %U already configured with port %d", + &collector, (int)(lm->ports[i])); + } + } + vec_add1 (lm->collectors, collector); + vec_add1 (lm->ports, udp_port); + vec_add1 (lm->src_addrs, src); + return 0; + } + else + { + for (i = 0; i < vec_len (lm->collectors); i++) + { + if ((lm->collectors[i].as_u32 == collector.as_u32) + && lm->ports[i] == udp_port) + { + vec_delete (lm->collectors, 1, i); + vec_delete (lm->ports, 1, i); + vec_delete (lm->src_addrs, 1, i); + return 0; + } + } + return clib_error_return (0, "collector %U:%d not configured", + &collector, udp_port); + } + return 0; +} + +VLIB_CLI_COMMAND (set_li_command, static) = { + .path = "set li", + .short_help = + "set li src <ip4-address> collector <ip4-address> udp-port <nnnn>", + .function = set_li_command_fn, +}; + +static clib_error_t * +li_init (vlib_main_t * vm) +{ + li_main_t * lm = &li_main; + + lm->vlib_main = vm; + lm->vnet_main = vnet_get_main(); + lm->hit_node_index = li_hit_node.index; + return 0; +} + +VLIB_INIT_FUNCTION(li_init); diff --git a/vnet/vnet/lawful-intercept/lawful_intercept.h b/vnet/vnet/lawful-intercept/lawful_intercept.h new file mode 100644 index 00000000000..f6cbf66e076 --- /dev/null +++ b/vnet/vnet/lawful-intercept/lawful_intercept.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __lawful_intercept_h__ +#define __lawful_intercept_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/dpdk_replication.h> + +typedef struct { + /* LI collector info */ + ip4_address_t * src_addrs; + ip4_address_t * collectors; + u16 * ports; + + /* Hit node index */ + u32 hit_node_index; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} li_main_t; + +li_main_t li_main; + +typedef CLIB_PACKED(struct { + ip4_header_t ip4; + udp_header_t udp; +}) ip4_udp_header_t; + +vlib_node_registration_t li_hit_node; + +#endif /* __lawful_intercept_h__ */ diff --git a/vnet/vnet/lawful-intercept/node.c b/vnet/vnet/lawful-intercept/node.c new file mode 100644 index 00000000000..cc066491116 --- /dev/null +++ b/vnet/vnet/lawful-intercept/node.c @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> + +#include <vnet/lawful-intercept/lawful_intercept.h> + +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +vlib_node_registration_t li_hit_node; + +typedef struct { + u32 next_index; +} li_hit_trace_t; + +/* packet trace format function */ +static u8 * format_li_hit_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + li_hit_trace_t * t = va_arg (*args, li_hit_trace_t *); + + s = format (s, "LI_HIT: next index %d", t->next_index); + + return s; +} + +vlib_node_registration_t li_hit_node; + +#define foreach_li_hit_error \ +_(HITS, "LI packets processed") \ +_(NO_COLLECTOR, "No collector configured") + +typedef enum { +#define _(sym,str) LI_HIT_ERROR_##sym, + foreach_li_hit_error +#undef _ + LI_HIT_N_ERROR, +} li_hit_error_t; + +static char * li_hit_error_strings[] = { +#define _(sym,string) string, + foreach_li_hit_error +#undef _ +}; + +typedef enum { + LI_HIT_NEXT_ETHERNET, + LI_HIT_N_NEXT, +} li_hit_next_t; + +static uword +li_hit_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + li_hit_next_t next_index; + vlib_frame_t * int_frame = 0; + u32 * to_int_next = 0; + li_main_t * lm = &li_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (PREDICT_FALSE (vec_len (lm->collectors) == 0)) + { + vlib_node_increment_counter (vm, li_hit_node.index, + LI_HIT_ERROR_NO_COLLECTOR, + n_left_from); + } + else + { + /* The intercept frame... */ + int_frame = vlib_get_frame_to_node (vm, ip4_lookup_node.index); + to_int_next = vlib_frame_vector_args (int_frame); + } + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + +#if 0 + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 next0 = LI_HIT_NEXT_INTERFACE_OUTPUT; + u32 next1 = LI_HIT_NEXT_INTERFACE_OUTPUT; + u32 sw_if_index0, sw_if_index1; + u8 tmp0[6], tmp1[6]; + ethernet_header_t *en0, *en1; + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* $$$$$ Dual loop: process 2 x packets here $$$$$ */ + ASSERT (b0->current_data == 0); + ASSERT (b1->current_data == 0); + + en0 = vlib_buffer_get_current (b0); + en1 = vlib_buffer_get_current (b1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + /* Send pkt back out the RX interface */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sw_if_index0; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = sw_if_index1; + + /* $$$$$ End of processing 2 x packets $$$$$ */ + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + li_hit_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + li_hit_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } +#endif /* $$$ dual-loop off */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + vlib_buffer_t * c0; + ip4_udp_header_t * iu0; + ip4_header_t * ip0; + udp_header_t * udp0; + u32 next0 = LI_HIT_NEXT_ETHERNET; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + if (PREDICT_TRUE(to_int_next != 0)) + { + /* Make an intercept copy */ + c0 = vlib_dpdk_clone_buffer (vm, b0); + + vlib_buffer_advance(c0, -sizeof(*iu0)); + + iu0 = vlib_buffer_get_current(c0); + ip0 = &iu0->ip4; + + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_UDP; + + ip0->src_address.as_u32 = lm->src_addrs[0].as_u32; + ip0->dst_address.as_u32 = lm->collectors[0].as_u32; + ip0->length = vlib_buffer_length_in_chain (vm, c0); + ip0->checksum = ip4_header_checksum (ip0); + + udp0 = &iu0->udp; + udp0->src_port = udp0->dst_port = + clib_host_to_net_u16(lm->ports[0]); + udp0->checksum = 0; + udp0->length = + clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm , b0)); + + to_int_next [0] = vlib_get_buffer_index (vm, c0); + to_int_next++; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + li_hit_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + if (int_frame) + { + int_frame->n_vectors = frame->n_vectors; + vlib_put_frame_to_node (vm, ip4_lookup_node.index, int_frame); + } + + vlib_node_increment_counter (vm, li_hit_node.index, + LI_HIT_ERROR_HITS, frame->n_vectors); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (li_hit_node) = { + .function = li_hit_node_fn, + .name = "li-hit", + .vector_size = sizeof (u32), + .format_trace = format_li_hit_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(li_hit_error_strings), + .error_strings = li_hit_error_strings, + + .n_next_nodes = LI_HIT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [LI_HIT_NEXT_ETHERNET] = "ethernet-input-not-l2", + }, +}; diff --git a/vnet/vnet/lisp-gpe/decap.c b/vnet/vnet/lisp-gpe/decap.c new file mode 100644 index 00000000000..e10f1f2e399 --- /dev/null +++ b/vnet/vnet/lisp-gpe/decap.c @@ -0,0 +1,298 @@ +/* + * decap.c: lisp-gpe decap processing + * + * Copyright (c) 2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/lisp-gpe/lisp_gpe.h> + +typedef struct { + u32 next_index; + u32 tunnel_index; + u32 error; + lisp_gpe_header_t h; +} lisp_gpe_rx_trace_t; + +static u8 * format_lisp_gpe_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lisp_gpe_rx_trace_t * t = va_arg (*args, lisp_gpe_rx_trace_t *); + + if (t->tunnel_index != ~0) + { + s = format (s, "NSH-VXLAN: tunnel %d next %d error %d", t->tunnel_index, + t->next_index, t->error); + } + else + { + s = format (s, "NSH-VXLAN: no tunnel next %d error %d\n", t->next_index, + t->error); + } + s = format (s, "\n %U", format_lisp_gpe_header_with_length, &t->h, + (u32) sizeof (t->h) /* max size */); + return s; +} + +static uword +lisp_gpe_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + lisp_gpe_main_t * ngm = &lisp_gpe_main; + u32 last_tunnel_index = ~0; + lisp_gpe_tunnel_key_t last_key; + u32 pkts_decapsulated = 0; + + memset (&last_key, 0xff, sizeof (last_key)); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + +#if 0 + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + nsh_unicast_header_t * h0, * h1; + u32 label0, label1; + u32 next0, next1; + uword * p0, * p1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + next0 = next1 = LISP_GPE_INPUT_NEXT_IP4_INPUT; + + label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); + label1 = clib_net_to_host_u32 (h1->label_exp_s_ttl); + + /* + * Translate label contents into a fib index. + * This is a decent sanity check, and guarantees + * a sane FIB for the downstream lookup + */ + label0 = vnet_nsh_uc_get_label (label0); + label1 = vnet_nsh_uc_get_label (label1); + + /* If 2xlabels match, and match the 1-wide cache, use it */ + if (label0 == label1 && rt->last_label == label0) + { + vnet_buffer(b0)->sw_if_index[VLIB_TX] = rt->last_fib_index; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = rt->last_fib_index; + } + else + { + p0 = hash_get (rt->mm->fib_index_by_nsh_label, label0); + if (PREDICT_FALSE (p0 == 0)) + { + next0 = LISP_GPE_INPUT_NEXT_DROP; + b0->error = node->errors[NSH_ERROR_BAD_LABEL]; + } + else + vnet_buffer(b0)->sw_if_index[VLIB_TX] = p0[0]; + + p1 = hash_get (rt->mm->fib_index_by_nsh_label, label1); + if (PREDICT_FALSE (p1 == 0)) + { + next1 = LISP_GPE_INPUT_NEXT_DROP; + b1->error = node->errors[NSH_ERROR_BAD_LABEL]; + } + else + { + vnet_buffer(b1)->sw_if_index[VLIB_TX] = p1[0]; + rt->last_fib_index = p1[0]; + rt->last_label = label1; + } + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->label_exp_s_ttl = label0; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_rx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->label_exp_s_ttl = label1; + } + + vlib_buffer_advance (b0, sizeof (*h0)); + vlib_buffer_advance (b1, sizeof (*h1)); + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + ip4_udp_lisp_gpe_header_t * iul0; + uword * p0; + u32 tunnel_index0; + lisp_gpe_tunnel_t * t0; + lisp_gpe_tunnel_key_t key0; + u32 error0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* udp leaves current_data pointing at the lisp header */ + vlib_buffer_advance + (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); + + iul0 = vlib_buffer_get_current (b0); + + /* pop (ip, udp, lisp-gpe) */ + vlib_buffer_advance (b0, sizeof (*iul0)); + + tunnel_index0 = ~0; + error0 = 0; + next0 = LISP_GPE_INPUT_NEXT_DROP; + + key0.src = iul0->ip4.src_address.as_u32; + key0.iid = iul0->lisp.iid; + + if (PREDICT_FALSE ((key0.as_u64[0] != last_key.as_u64[0]))) + { + p0 = hash_get_mem (ngm->lisp_gpe_tunnel_by_key, &key0); + + if (p0 == 0) + { + error0 = LISP_GPE_ERROR_NO_SUCH_TUNNEL; + goto trace0; + } + + last_key.as_u64[0] = key0.as_u64[0]; + tunnel_index0 = last_tunnel_index = p0[0]; + } + else + tunnel_index0 = last_tunnel_index; + + t0 = pool_elt_at_index (ngm->tunnels, tunnel_index0); + + next0 = t0->decap_next_index; + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b0); + + /* + * ip[46] lookup in the configured FIB + * lisp-gpe-encap, here's the encap tunnel sw_if_index + */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index; + pkts_decapsulated ++; + + trace0: + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lisp_gpe_rx_trace_t *tr + = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->error = error0; + tr->tunnel_index = tunnel_index0; + tr->h = iul0->lisp; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, lisp_gpe_input_node.index, + LISP_GPE_ERROR_DECAPSULATED, + pkts_decapsulated); + return from_frame->n_vectors; +} + +static char * lisp_gpe_error_strings[] = { +#define lisp_gpe_error(n,s) s, +#include <vnet/lisp-gpe/lisp_gpe_error.def> +#undef lisp_gpe_error +#undef _ +}; + +VLIB_REGISTER_NODE (lisp_gpe_input_node) = { + .function = lisp_gpe_input, + .name = "lisp-gpe-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = LISP_GPE_N_ERROR, + .error_strings = lisp_gpe_error_strings, + + .n_next_nodes = LISP_GPE_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [LISP_GPE_INPUT_NEXT_##s] = n, + foreach_lisp_gpe_input_next +#undef _ + }, + + .format_buffer = format_lisp_gpe_header_with_length, + .format_trace = format_lisp_gpe_rx_trace, + // $$$$ .unformat_buffer = unformat_lisp_gpe_header, +}; diff --git a/vnet/vnet/lisp-gpe/encap.c b/vnet/vnet/lisp-gpe/encap.c new file mode 100644 index 00000000000..b3a52c464be --- /dev/null +++ b/vnet/vnet/lisp-gpe/encap.c @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/lisp-gpe/lisp_gpe.h> + +/* Statistics (not really errors) */ +#define foreach_lisp_gpe_encap_error \ +_(ENCAPSULATED, "good packets encapsulated") + +static char * lisp_gpe_encap_error_strings[] = { +#define _(sym,string) string, + foreach_lisp_gpe_encap_error +#undef _ +}; + +typedef enum { +#define _(sym,str) LISP_GPE_ENCAP_ERROR_##sym, + foreach_lisp_gpe_encap_error +#undef _ + LISP_GPE_ENCAP_N_ERROR, +} lisp_gpe_encap_error_t; + +typedef enum { + LISP_GPE_ENCAP_NEXT_IP4_LOOKUP, + LISP_GPE_ENCAP_NEXT_DROP, + LISP_GPE_ENCAP_N_NEXT, +} lisp_gpe_encap_next_t; + +typedef struct { + u32 tunnel_index; +} lisp_gpe_encap_trace_t; + +u8 * format_lisp_gpe_encap_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lisp_gpe_encap_trace_t * t + = va_arg (*args, lisp_gpe_encap_trace_t *); + + s = format (s, "LISP-GPE-ENCAP: tunnel %d", t->tunnel_index); + return s; +} + +#define foreach_fixed_header_offset \ +_(0) _(1) _(2) _(3) + +static uword +lisp_gpe_encap (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + lisp_gpe_main_t * ngm = &lisp_gpe_main; + vnet_main_t * vnm = ngm->vnet_main; + u32 pkts_encapsulated = 0; + u16 old_l0 = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + +#if 0 + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + nsh_unicast_header_t * h0, * h1; + u32 label0, label1; + u32 next0, next1; + uword * p0, * p1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + next0 = next1 = NSH_INPUT_NEXT_IP4_INPUT; + + label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); + label1 = clib_net_to_host_u32 (h1->label_exp_s_ttl); + + /* + * Translate label contents into a fib index. + * This is a decent sanity check, and guarantees + * a sane FIB for the downstream lookup + */ + label0 = vnet_nsh_uc_get_label (label0); + label1 = vnet_nsh_uc_get_label (label1); + + /* If 2xlabels match, and match the 1-wide cache, use it */ + if (label0 == label1 && rt->last_label == label0) + { + vnet_buffer(b0)->sw_if_index[VLIB_TX] = rt->last_fib_index; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = rt->last_fib_index; + } + else + { + p0 = hash_get (rt->mm->fib_index_by_nsh_label, label0); + if (PREDICT_FALSE (p0 == 0)) + { + next0 = NSH_INPUT_NEXT_DROP; + b0->error = node->errors[NSH_ERROR_BAD_LABEL]; + } + else + vnet_buffer(b0)->sw_if_index[VLIB_TX] = p0[0]; + + p1 = hash_get (rt->mm->fib_index_by_nsh_label, label1); + if (PREDICT_FALSE (p1 == 0)) + { + next1 = NSH_INPUT_NEXT_DROP; + b1->error = node->errors[NSH_ERROR_BAD_LABEL]; + } + else + { + vnet_buffer(b1)->sw_if_index[VLIB_TX] = p1[0]; + rt->last_fib_index = p1[0]; + rt->last_label = label1; + } + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->label_exp_s_ttl = label0; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_rx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->label_exp_s_ttl = label1; + } + + vlib_buffer_advance (b0, sizeof (*h0)); + vlib_buffer_advance (b1, sizeof (*h1)); + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = LISP_GPE_ENCAP_NEXT_IP4_LOOKUP; + vnet_hw_interface_t * hi0; + ip4_header_t * ip0; + udp_header_t * udp0; + u64 * copy_src0, * copy_dst0; + u32 * copy_src_last0, * copy_dst_last0; + lisp_gpe_tunnel_t * t0; + u16 new_l0; + ip_csum_t sum0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* 1-wide cache? */ + hi0 = vnet_get_sup_hw_interface + (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]); + + t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance); + + ASSERT(vec_len(t0->rewrite) >= 24); + + /* Apply the rewrite string. $$$$ vnet_rewrite? */ + vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite)); + + ip0 = vlib_buffer_get_current(b0); + /* Copy the fixed header */ + copy_dst0 = (u64 *) ip0; + copy_src0 = (u64 *) t0->rewrite; + + ASSERT (sizeof (ip4_udp_lisp_gpe_header_t) == 36); + + /* Copy first 32 octets 8-bytes at a time */ +#define _(offs) copy_dst0[offs] = copy_src0[offs]; + foreach_fixed_header_offset; +#undef _ + /* Last 4 octets. Hopefully gcc will be our friend */ + copy_dst_last0 = (u32 *)(©_dst0[4]); + copy_src_last0 = (u32 *)(©_src0[4]); + + copy_dst_last0[0] = copy_src_last0[0]; + + /* fix the <bleep>ing outer-IP checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + /* Fix UDP length */ + udp0 = (udp_header_t *)(ip0+1); + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + + udp0->length = new_l0; + + /* Reset to look up tunnel partner in the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; + pkts_encapsulated ++; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lisp_gpe_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->tunnel_index = t0 - ngm->tunnels; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, node->node_index, + LISP_GPE_ENCAP_ERROR_ENCAPSULATED, + pkts_encapsulated); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (lisp_gpe_encap_node) = { + .function = lisp_gpe_encap, + .name = "lisp-gpe-encap", + .vector_size = sizeof (u32), + .format_trace = format_lisp_gpe_encap_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(lisp_gpe_encap_error_strings), + .error_strings = lisp_gpe_encap_error_strings, + + .n_next_nodes = LISP_GPE_ENCAP_N_NEXT, + + .next_nodes = { + [LISP_GPE_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [LISP_GPE_ENCAP_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.c b/vnet/vnet/lisp-gpe/lisp_gpe.c new file mode 100644 index 00000000000..eb4ca919b20 --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe.c @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/lisp-gpe/lisp_gpe.h> + +lisp_gpe_main_t lisp_gpe_main; + +static u8 * format_decap_next (u8 * s, va_list * args) +{ + u32 next_index = va_arg (*args, u32); + + switch (next_index) + { + case LISP_GPE_INPUT_NEXT_DROP: + return format (s, "drop"); + case LISP_GPE_INPUT_NEXT_IP4_INPUT: + return format (s, "ip4"); + case LISP_GPE_INPUT_NEXT_IP6_INPUT: + return format (s, "ip6"); + case LISP_GPE_INPUT_NEXT_LISP_GPE_ENCAP: + return format (s, "nsh-lisp-gpe"); + default: + return format (s, "unknown %d", next_index); + } + return s; +} + +u8 * format_lisp_gpe_tunnel (u8 * s, va_list * args) +{ + lisp_gpe_tunnel_t * t = va_arg (*args, lisp_gpe_tunnel_t *); + lisp_gpe_main_t * ngm = &lisp_gpe_main; + + s = format (s, + "[%d] %U (src) %U (dst) fibs: encap %d, decap %d", + t - ngm->tunnels, + format_ip4_address, &t->src, + format_ip4_address, &t->dst, + t->encap_fib_index, + t->decap_fib_index); + + s = format (s, " decap next %U\n", format_decap_next, t->decap_next_index); + s = format (s, "lisp ver %d ", (t->ver_res>>6)); + +#define _(n,v) if (t->flags & v) s = format (s, "%s-bit ", #n); + foreach_lisp_gpe_flag_bit; +#undef _ + + s = format (s, "next_protocol %d ver_res %x res %x\n", + t->next_protocol, t->ver_res, t->res); + + s = format (s, "iid %d (0x%x)\n", t->iid, t->iid); + return s; +} + +static u8 * format_lisp_gpe_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "lisp_gpe_tunnel%d", dev_instance); +} + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +VNET_DEVICE_CLASS (lisp_gpe_device_class,static) = { + .name = "LISP_GPE", + .format_device_name = format_lisp_gpe_name, + .format_tx_trace = format_lisp_gpe_encap_trace, + .tx_function = dummy_interface_tx, +}; + +static uword dummy_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + return 0; +} + +u8 * format_lisp_gpe_header_with_length (u8 * s, va_list * args) +{ + lisp_gpe_header_t * h = va_arg (*args, lisp_gpe_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "gre-nsh header truncated"); + + s = format (s, "flags: "); +#define _(n,v) if (h->flags & v) s = format (s, "%s ", #n); + foreach_lisp_gpe_flag_bit; +#undef _ + + s = format (s, "\n ver_res %d res %d next_protocol %d iid %d(%x)", + h->ver_res, h->res, h->next_protocol, + clib_net_to_host_u32 (h->iid), + clib_net_to_host_u32 (h->iid)); + return s; +} + +VNET_HW_INTERFACE_CLASS (lisp_gpe_hw_class) = { + .name = "LISP_GPE", + .format_header = format_lisp_gpe_header_with_length, + .set_rewrite = dummy_set_rewrite, +}; + +#define foreach_copy_field \ +_(src.as_u32) \ +_(dst.as_u32) \ +_(encap_fib_index) \ +_(decap_fib_index) \ +_(decap_next_index) \ +_(flags) \ +_(next_protocol) \ +_(ver_res) \ +_(res) \ +_(iid) + +static int lisp_gpe_rewrite (lisp_gpe_tunnel_t * t) +{ + u8 *rw = 0; + ip4_header_t * ip0; + lisp_gpe_header_t * lisp0; + ip4_udp_lisp_gpe_header_t * h0; + int len; + + len = sizeof (*h0); + + vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES); + + h0 = (ip4_udp_lisp_gpe_header_t *) rw; + + /* Fixed portion of the (outer) ip4 header */ + ip0 = &h0->ip4; + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_UDP; + + /* we fix up the ip4 header length and checksum after-the-fact */ + ip0->src_address.as_u32 = t->src.as_u32; + ip0->dst_address.as_u32 = t->dst.as_u32; + ip0->checksum = ip4_header_checksum (ip0); + + /* UDP header, randomize src port on something, maybe? */ + h0->udp.src_port = clib_host_to_net_u16 (4341); + h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe); + + /* LISP-gpe header */ + lisp0 = &h0->lisp; + + lisp0->flags = t->flags; + lisp0->ver_res = t->ver_res; + lisp0->res = t->res; + lisp0->next_protocol = t->next_protocol; + lisp0->iid = clib_host_to_net_u32 (t->iid); + + t->rewrite = rw; + return (0); +} + +int vnet_lisp_gpe_add_del_tunnel +(vnet_lisp_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp) +{ + lisp_gpe_main_t * ngm = &lisp_gpe_main; + lisp_gpe_tunnel_t *t = 0; + vnet_main_t * vnm = ngm->vnet_main; + vnet_hw_interface_t * hi; + uword * p; + u32 hw_if_index = ~0; + u32 sw_if_index = ~0; + int rv; + lisp_gpe_tunnel_key_t key, *key_copy; + hash_pair_t *hp; + + key.src = a->src.as_u32; + key.iid = clib_host_to_net_u32(a->iid); + + p = hash_get_mem (ngm->lisp_gpe_tunnel_by_key, &key); + + if (a->is_add) + { + /* adding a tunnel: tunnel must not already exist */ + if (p) + return VNET_API_ERROR_INVALID_VALUE; + + if (a->decap_next_index >= LISP_GPE_INPUT_N_NEXT) + return VNET_API_ERROR_INVALID_DECAP_NEXT; + + pool_get_aligned (ngm->tunnels, t, CLIB_CACHE_LINE_BYTES); + memset (t, 0, sizeof (*t)); + + /* copy from arg structure */ +#define _(x) t->x = a->x; + foreach_copy_field; +#undef _ + + rv = lisp_gpe_rewrite (t); + + if (rv) + { + pool_put (ngm->tunnels, t); + return rv; + } + + key_copy = clib_mem_alloc (sizeof (*key_copy)); + memcpy (key_copy, &key, sizeof (*key_copy)); + + hash_set_mem (ngm->lisp_gpe_tunnel_by_key, key_copy, + t - ngm->tunnels); + + if (vec_len (ngm->free_lisp_gpe_tunnel_hw_if_indices) > 0) + { + hw_if_index = ngm->free_lisp_gpe_tunnel_hw_if_indices + [vec_len (ngm->free_lisp_gpe_tunnel_hw_if_indices)-1]; + _vec_len (ngm->free_lisp_gpe_tunnel_hw_if_indices) -= 1; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = t - ngm->tunnels; + hi->hw_instance = hi->dev_instance; + } + else + { + hw_if_index = vnet_register_interface + (vnm, lisp_gpe_device_class.index, t - ngm->tunnels, + lisp_gpe_hw_class.index, t - ngm->tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->output_node_index = lisp_gpe_encap_node.index; + } + + t->hw_if_index = hw_if_index; + t->sw_if_index = sw_if_index = hi->sw_if_index; + + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + } + else + { + /* deleting a tunnel: tunnel must exist */ + if (!p) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + t = pool_elt_at_index (ngm->tunnels, p[0]); + + vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */); + vec_add1 (ngm->free_lisp_gpe_tunnel_hw_if_indices, t->hw_if_index); + + hp = hash_get_pair (ngm->lisp_gpe_tunnel_by_key, &key); + key_copy = (void *)(hp->key); + hash_unset_mem (ngm->lisp_gpe_tunnel_by_key, &key); + clib_mem_free (key_copy); + + vec_free (t->rewrite); + pool_put (ngm->tunnels, t); + } + + if (sw_if_indexp) + *sw_if_indexp = sw_if_index; + + return 0; +} + +static u32 fib_index_from_fib_id (u32 fib_id) +{ + ip4_main_t * im = &ip4_main; + uword * p; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (!p) + return ~0; + + return p[0]; +} + +static uword unformat_decap_next (unformat_input_t * input, va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + u32 tmp; + + if (unformat (input, "drop")) + *result = LISP_GPE_INPUT_NEXT_DROP; + else if (unformat (input, "ip4")) + *result = LISP_GPE_INPUT_NEXT_IP4_INPUT; + else if (unformat (input, "ip6")) + *result = LISP_GPE_INPUT_NEXT_IP6_INPUT; + else if (unformat (input, "ethernet")) + *result = LISP_GPE_INPUT_NEXT_IP6_INPUT; + else if (unformat (input, "lisp-gpe")) + *result = LISP_GPE_INPUT_NEXT_LISP_GPE_ENCAP; + else if (unformat (input, "%d", &tmp)) + *result = tmp; + else + return 0; + return 1; +} + +static clib_error_t * +lisp_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ip4_address_t src, dst; + u8 is_add = 1; + u8 src_set = 0; + u8 dst_set = 0; + u32 encap_fib_index = 0; + u32 decap_fib_index = 0; + u8 next_protocol = LISP_GPE_NEXT_PROTOCOL_IP4; + u32 decap_next_index = LISP_GPE_INPUT_NEXT_IP4_INPUT; + u8 flags = LISP_GPE_FLAGS_P; + u8 ver_res = 0; + u8 res = 0; + u32 iid = 0; + u8 iid_set = 0; + u32 tmp; + int rv; + vnet_lisp_gpe_add_del_tunnel_args_t _a, * a = &_a; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "src %U", + unformat_ip4_address, &src)) + src_set = 1; + else if (unformat (line_input, "dst %U", + unformat_ip4_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "encap-vrf-id %d", &tmp)) + { + encap_fib_index = fib_index_from_fib_id (tmp); + if (encap_fib_index == ~0) + return clib_error_return (0, "nonexistent encap fib id %d", tmp); + } + else if (unformat (line_input, "decap-vrf-id %d", &tmp)) + { + decap_fib_index = fib_index_from_fib_id (tmp); + if (decap_fib_index == ~0) + return clib_error_return (0, "nonexistent decap fib id %d", tmp); + } + else if (unformat (line_input, "decap-next %U", unformat_decap_next, + &decap_next_index)) + ; + else if (unformat(line_input, "next-ip4")) + next_protocol = 1; + else if (unformat(line_input, "next-ip6")) + next_protocol = 2; + else if (unformat(line_input, "next-ethernet")) + next_protocol = 3; + else if (unformat(line_input, "next-nsh")) + next_protocol = 4; + /* Allow the user to specify anything they want in the LISP hdr */ + else if (unformat (line_input, "ver_res %x", &tmp)) + ver_res = tmp; + else if (unformat (line_input, "res %x", &tmp)) + res = tmp; + else if (unformat (line_input, "flags %x", &tmp)) + flags = tmp; + else if (unformat (line_input, "n-bit")) + flags |= LISP_GPE_FLAGS_N; + else if (unformat (line_input, "l-bit")) + flags |= LISP_GPE_FLAGS_L; + else if (unformat (line_input, "e-bit")) + flags |= LISP_GPE_FLAGS_E; + else if (unformat (line_input, "v-bit")) + flags |= LISP_GPE_FLAGS_V; + else if (unformat (line_input, "i-bit")) + flags |= LISP_GPE_FLAGS_V; + else if (unformat (line_input, "not-p-bit")) + flags &= ~LISP_GPE_FLAGS_P; + else if (unformat (line_input, "p-bit")) + flags |= LISP_GPE_FLAGS_P; + else if (unformat (line_input, "o-bit")) + flags |= LISP_GPE_FLAGS_O; + else if (unformat (line_input, "iidx %x", &iid)) + iid_set = 1; + else if (unformat (line_input, "iid %d", &iid)) + iid_set = 1; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (src_set == 0) + return clib_error_return (0, "tunnel src address not specified"); + + if (dst_set == 0) + return clib_error_return (0, "tunnel dst address not specified"); + + if (iid_set == 0) + return clib_error_return (0, "iid not specified"); + + memset (a, 0, sizeof (*a)); + + a->is_add = is_add; + +#define _(x) a->x = x; + foreach_copy_field; +#undef _ + + rv = vnet_lisp_gpe_add_del_tunnel (a, 0 /* hw_if_indexp */); + + switch(rv) + { + case 0: + break; + case VNET_API_ERROR_INVALID_DECAP_NEXT: + return clib_error_return (0, "invalid decap-next..."); + + case VNET_API_ERROR_TUNNEL_EXIST: + return clib_error_return (0, "tunnel already exists..."); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel does not exist..."); + + default: + return clib_error_return + (0, "vnet_lisp_gpe_add_del_tunnel returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (create_lisp_gpe_tunnel_command, static) = { + .path = "lisp gpe tunnel", + .short_help = + "lisp gpe tunnel src <ip4-addr> dst <ip4-addr> iidx <0xnn> | iid <nn>\n" + " [encap-fib-id <nn>] [decap-fib-id <nn>]\n" + " [n-bit][l-bit][e-bit][v-bit][i-bit][p-bit][not-p-bit][o-bit]\n" + " [next-ip4][next-ip6][next-ethernet][next-nsh]\n" + " [decap-next [ip4|ip6|ethernet|nsh-encap|<nn>]][del]\n", + .function = lisp_gpe_add_del_tunnel_command_fn, +}; + +static clib_error_t * +show_lisp_gpe_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + lisp_gpe_main_t * ngm = &lisp_gpe_main; + lisp_gpe_tunnel_t * t; + + if (pool_elts (ngm->tunnels) == 0) + vlib_cli_output (vm, "No lisp-gpe tunnels configured..."); + + pool_foreach (t, ngm->tunnels, + ({ + vlib_cli_output (vm, "%U", format_lisp_gpe_tunnel, t); + })); + + return 0; +} + +VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) = { + .path = "show lisp gpe tunnel", + .function = show_lisp_gpe_tunnel_command_fn, +}; + +clib_error_t *lisp_gpe_init (vlib_main_t *vm) +{ + lisp_gpe_main_t *ngm = &lisp_gpe_main; + + ngm->vnet_main = vnet_get_main(); + ngm->vlib_main = vm; + + ngm->lisp_gpe_tunnel_by_key + = hash_create_mem (0, sizeof(lisp_gpe_tunnel_key_t), sizeof (uword)); + + udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe, + lisp_gpe_input_node.index, 1 /* is_ip4 */); + return 0; +} + +VLIB_INIT_FUNCTION(lisp_gpe_init); + diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.h b/vnet/vnet/lisp-gpe/lisp_gpe.h new file mode 100644 index 00000000000..8ca721dda84 --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_lisp_gpe_h +#define included_vnet_lisp_gpe_h + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/l2/l2_input.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/lisp-gpe/lisp_gpe_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/udp.h> + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; /* 20 bytes */ + udp_header_t udp; /* 8 bytes */ + lisp_gpe_header_t lisp; /* 8 bytes */ +}) ip4_udp_lisp_gpe_header_t; + +typedef CLIB_PACKED(struct { + /* + * Key fields: ip src, LISP iid, ??? $$$$$$$$$ correct answer ??? + * all fields in NET byte order + */ + union { + struct { + u32 src; + u32 iid; + }; + u64 as_u64[1]; + }; +}) lisp_gpe_tunnel_key_t; + +typedef struct { + /* Rewrite string. $$$$ embed vnet_rewrite header */ + u8 * rewrite; + + /* decap next index */ + u32 decap_next_index; + + /* tunnel src and dst addresses */ + ip4_address_t src; + ip4_address_t dst; + + /* FIB indices */ + u32 encap_fib_index; /* tunnel partner lookup here */ + u32 decap_fib_index; /* inner IP lookup here */ + + /* vnet intfc hw/sw_if_index */ + u32 hw_if_index; + u32 sw_if_index; + + /* LISP header fields in HOST byte order */ + u8 flags; + u8 ver_res; + u8 res; + u8 next_protocol; + u32 iid; +} lisp_gpe_tunnel_t; + +#define foreach_lisp_gpe_input_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(IP6_INPUT, "ip6-input") \ +_(ETHERNET_INPUT, "ethernet-input") \ +_(LISP_GPE_ENCAP, "lisp-gpe-encap") + +typedef enum { +#define _(s,n) LISP_GPE_INPUT_NEXT_##s, + foreach_lisp_gpe_input_next +#undef _ + LISP_GPE_INPUT_N_NEXT, +} lisp_gpe_input_next_t; + +typedef enum { +#define lisp_gpe_error(n,s) LISP_GPE_ERROR_##n, +#include <vnet/lisp-gpe/lisp_gpe_error.def> +#undef lisp_gpe_error + LISP_GPE_N_ERROR, +} lisp_gpe_input_error_t; + +typedef struct { + /* vector of encap tunnel instances */ + lisp_gpe_tunnel_t *tunnels; + + /* lookup tunnel by key */ + uword * lisp_gpe_tunnel_by_key; + + /* Free vlib hw_if_indices */ + u32 * free_lisp_gpe_tunnel_hw_if_indices; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} lisp_gpe_main_t; + +lisp_gpe_main_t lisp_gpe_main; + +vlib_node_registration_t lisp_gpe_input_node; +vlib_node_registration_t lisp_gpe_encap_node; + +u8 * format_lisp_gpe_encap_trace (u8 * s, va_list * args); +u8 * format_lisp_gpe_header_with_length (u8 * s, va_list * args); + +typedef struct { + u8 is_add; + ip4_address_t src, dst; + u32 encap_fib_index; + u32 decap_fib_index; + u32 decap_next_index; + u8 flags; + u8 ver_res; + u8 res; + u8 next_protocol; + u32 iid; /* host byte order */ +} vnet_lisp_gpe_add_del_tunnel_args_t; + +int vnet_lisp_gpe_add_del_tunnel +(vnet_lisp_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp); + +u8 * format_lisp_gpe_header_with_length (u8 * s, va_list * args); + +#endif /* included_vnet_lisp_gpe_h */ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_error.def b/vnet/vnet/lisp-gpe/lisp_gpe_error.def new file mode 100644 index 00000000000..6ef894f474d --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_error.def @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +lisp_gpe_error (DECAPSULATED, "good packets decapsulated") +lisp_gpe_error (NO_SUCH_TUNNEL, "no such tunnel packets") diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_packet.h b/vnet/vnet/lisp-gpe/lisp_gpe_packet.h new file mode 100644 index 00000000000..b3d96ed9d44 --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_packet.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_lisp_gpe_packet_h +#define included_lisp_gpe_packet_h + +/* + * From draft-lewis-lisp-gpe-02.txt + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |N|L|E|V|I|P|R|O|Ver| Reserved | Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Instance ID/Locator-Status-Bits | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * N: The N-bit is the nonce-present bit. When this bit is set to 1, + * the low-order 24 bits of the first 32 bits of the LISP header + * contain a Nonce. See Section 6.3.1 for details. Both N- and + * V-bits MUST NOT be set in the same packet. If they are, a + * decapsulating ETR MUST treat the 'Nonce/Map-Version' field as + * having a Nonce value present. + * + * L: The L-bit is the 'Locator-Status-Bits' field enabled bit. When + * this bit is set to 1, the Locator-Status-Bits in the second + * 32 bits of the LISP header are in use. + * + * E: The E-bit is the echo-nonce-request bit. This bit MUST be ignored + * and has no meaning when the N-bit is set to 0. When the N-bit is + * set to 1 and this bit is set to 1, an ITR is requesting that the + * nonce value in the 'Nonce' field be echoed back in LISP- + * encapsulated packets when the ITR is also an ETR. See + * Section 6.3.1 for details. + * + * V: The V-bit is the Map-Version present bit. When this bit is set to + * 1, the N-bit MUST be 0. Refer to Section 6.6.3 for more details. + * + * I: The I-bit is the Instance ID bit. See Section 5.5 for more + * details. When this bit is set to 1, the 'Locator-Status-Bits' + * field is reduced to 8 bits and the high-order 24 bits are used as + * an Instance ID. If the L-bit is set to 0, then the low-order + * 8 bits are transmitted as zero and ignored on receipt. + * + * P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit + * MUST be set to 1 to indicate the presence of the 8 bit next + * protocol field. + * + * P = 0 indicates that the payload MUST conform to LISP as defined + * in [RFC6830]. + * + * Flag bit 5 was chosen as the P bit because this flag bit is + * currently unallocated in LISP [RFC6830]. + * + * O: Flag bit 7 is defined as the O bit. When the O bit is set to 1, the + * packet is an OAM packet and OAM processing MUST occur. The OAM + * protocol details are out of scope for this document. As with the + * P-bit, bit 7 is currently a reserved flag in [RFC6830]. + * + * Next Protocol Field: The lower 8 bits of the first word are used to + * carry a next protocol. This next protocol field contains the + * protocol of the encapsulated payload packet. + * + * LISP [RFC6830] uses the lower 16 bits of the first word for either + * a nonce, an echo-nonce ([RFC6830]) or to support map-versioning + * ([RFC6834]). These are all optional capabilities that are + * indicated by setting the N, E, and the V bit respectively. + * + * To maintain the desired data plane compatibility, when the P bit + * is set, the N, E, and V bits MUST be set to zero. + * + * A new protocol registry will be requested from IANA for the Next + * Protocol field. This draft defines the following Next Protocol + * values: + * + * 0x1 : IPv4 + * 0x2 : IPv6 + * 0x3 : Ethernet + * 0x4: Network Service Header + */ + +typedef struct { + u8 flags; + u8 ver_res; + u8 res; + u8 next_protocol; + u32 iid; +} lisp_gpe_header_t; + +#define foreach_lisp_gpe_flag_bit \ +_(N, 0x80) \ +_(L, 0x40) \ +_(E, 0x20) \ +_(V, 0x10) \ +_(I, 0x08) \ +_(P, 0x04) \ +_(O, 0x01) + +typedef enum { +#define _(n,v) LISP_GPE_FLAGS_##n = v, +foreach_lisp_gpe_flag_bit +#undef _ +} vnet_lisp_gpe_flag_bit_t; + +#define LISP_GPE_VERSION 0x0 + +#define LISP_GPE_NEXT_PROTOCOL_IP4 0x1 +#define LISP_GPE_NEXT_PROTOCOL_IP6 0x2 +#define LISP_GPE_NEXT_PROTOCOL_ETHERNET 0x3 +#define LISP_GPE_NEXT_PROTOCOL_NSH 0x4 + +#endif /* included_lisp_gpe_packet_h */ diff --git a/vnet/vnet/lisp-gpe/rfc.txt b/vnet/vnet/lisp-gpe/rfc.txt new file mode 100644 index 00000000000..5e3da150c70 --- /dev/null +++ b/vnet/vnet/lisp-gpe/rfc.txt @@ -0,0 +1,826 @@ +Network Working Group D. Lewis +Internet-Draft Cisco Systems, Inc. +Intended status: Informational P. Agarwal +Expires: January 5, 2015 Broadcom + L. Kreeger + F. Maino + P. Quinn + M. Smith + N. Yadav + Cisco Systems, Inc. + July 4, 2014 + + + LISP Generic Protocol Extension + draft-lewis-lisp-gpe-02.txt + +Abstract + + This draft describes extending the Locator/ID Separation Protocol + (LISP) [RFC6830], via changes to the LISP header, with three new + capabilities: support for multi-protocol encapsulation, operations, + administration and management (OAM) signaling, and explicit + versioning. + +Status of this Memo + + This Internet-Draft is submitted in full conformance with the + provisions of BCP 78 and BCP 79. + + Internet-Drafts are working documents of the Internet Engineering + Task Force (IETF). Note that other groups may also distribute + working documents as Internet-Drafts. The list of current Internet- + Drafts is at http://datatracker.ietf.org/drafts/current/. + + Internet-Drafts are draft documents valid for a maximum of six months + and may be updated, replaced, or obsoleted by other documents at any + time. It is inappropriate to use Internet-Drafts as reference + material or to cite them other than as "work in progress." + + This Internet-Draft will expire on January 5, 2015. + +Copyright Notice + + Copyright (c) 2014 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + + + +Lewis, et al. Expires January 5, 2015 [Page 1] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + + (http://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 3 + 2. LISP Header Without Protocol Extensions . . . . . . . . . . . 4 + 3. Generic Protocol Extension for LISP (LISP-gpe) . . . . . . . . 5 + 3.1. Multi Protocol Support . . . . . . . . . . . . . . . . . . 5 + 3.2. OAM Support . . . . . . . . . . . . . . . . . . . . . . . 6 + 3.3. Version Bits . . . . . . . . . . . . . . . . . . . . . . . 6 + 4. Backward Compatibility . . . . . . . . . . . . . . . . . . . . 8 + 4.1. LISP-gpe Routers to (legacy) LISP Routers . . . . . . . . 8 + 4.2. (legacy) LISP Routers to LISP-gpe Routers . . . . . . . . 8 + 4.3. Type of Service . . . . . . . . . . . . . . . . . . . . . 8 + 4.4. VLAN Identifier (VID) . . . . . . . . . . . . . . . . . . 8 + 5. LISP-gpe Examples . . . . . . . . . . . . . . . . . . . . . . 9 + 6. Security Considerations . . . . . . . . . . . . . . . . . . . 11 + 7. Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . 12 + 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 13 + 9. References . . . . . . . . . . . . . . . . . . . . . . . . . . 14 + 9.1. Normative References . . . . . . . . . . . . . . . . . . . 14 + 9.2. Informative References . . . . . . . . . . . . . . . . . . 14 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 15 + + + + + + + + + + + + + + + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 2] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + +1. Introduction + + LISP [RFC6830] defines an encapsulation format that carries IPv4 or + IPv6 (henceforth referred to as IP) packets in a LISP header and + outer UDP/IP transport. + + The LISP header does not specify the protocol being encapsulated and + therefore is currently limited to encapsulating only IP packet + payloads. Other protocols, most notably VXLAN [VXLAN] (which defines + a similar header format to LISP), are used to encapsulate L2 + protocols such as Ethernet. LISP [RFC6830] can be extended to + indicate the inner protocol, enabling the encapsulation of Ethernet, + IP or any other desired protocol all the while ensuring compatibility + with existing LISP [RFC6830] deployments. + + As LISP is deployed, there's also the need to provide increased + visibility and diagnostic capabilities within the overlay. + + This document describes extending LISP ([RFC6830]) via the following + changes: + + Next Protocol Bit (P bit): A reserved flag bit is allocated, and set + in the LISP-gpe header to indicate that a next protocol field is + present. + + OAM Flag Bit (O bit): A reserved flag bit is allocated, and set in + the LISP-gpe header, to indicate that the packet is an OAM packet. + + Version: Two reserved bits are allocated, and set in the LISP-gpe + header, to indicate LISP-gpe protocol version. + + Next protocol: An 8 bit next protocol field is present in the LISP- + gpe header. + + + + + + + + + + + + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 3] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + +2. LISP Header Without Protocol Extensions + + As described in the introduction, the LISP header has no protocol + identifier that indicates the type of payload being carried by LISP. + Because of this, LISP is limited to an IP payload. Furthermore, the + LISP header has no mechanism to signal OAM packets. + + The LISP header contains flags (some defined, some reserved), a + Nonce/Map-version field and an instance ID/Locator-status-bit field. + The flags provide flexibility to define how the reserved bits can be + used to change the definition of the LISP header. + + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |N|L|E|V|I|flags| Nonce/Map-Version | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Instance ID/Locator-Status-Bits | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + Figure 1: LISP Header + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 4] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + +3. Generic Protocol Extension for LISP (LISP-gpe) + +3.1. Multi Protocol Support + + This draft defines the following changes to the LISP header in order + to support multi-protocol encapsulation. + + P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit + MUST be set to 1 to indicate the presence of the 8 bit next + protocol field. + + P = 0 indicates that the payload MUST conform to LISP as defined + in [RFC6830]. + + Flag bit 5 was chosen as the P bit because this flag bit is + currently unallocated in LISP [RFC6830]. + + Next Protocol Field: The lower 8 bits of the first word are used to + carry a next protocol. This next protocol field contains the + protocol of the encapsulated payload packet. + + LISP [RFC6830] uses the lower 16 bits of the first word for either + a nonce, an echo-nonce ([RFC6830]) or to support map-versioning + ([RFC6834]). These are all optional capabilities that are + indicated by setting the N, E, and the V bit respectively. + + To maintain the desired data plane compatibility, when the P bit + is set, the N, E, and V bits MUST be set to zero. + + A new protocol registry will be requested from IANA for the Next + Protocol field. This draft defines the following Next Protocol + values: + + 0x1 : IPv4 + + 0x2 : IPv6 + + 0x3 : Ethernet + + 0x4: Network Service Header + + + + + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 5] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |N|L|E|V|I|P|R|R| Reserved | Next Protocol | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Instance ID/Locator-Status-Bits | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + Figure 2: LISP-gpe Next Protocol (P=1) + +3.2. OAM Support + + Flag bit 7 is defined as the O bit. When the O bit is set to 1, the + packet is an OAM packet and OAM processing MUST occur. The OAM + protocol details are out of scope for this document. As with the + P-bit, bit 7 is currently a reserved flag in [RFC6830]. + + + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |N|L|E|V|I|P|R|O| Reserved | Next Protocol | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Instance ID/Locator-Status-Bits | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + Figure 3: LISP-gpe OAM bit (P=1) + +3.3. Version Bits + + LISP-gpe bits8 and 9 are defined as version bits. The version field + is used to ensure backward compatibility going forward with future + LISP-gpe updates. + + The initial version for LISP-gpe is 0. + + + + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 6] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |N|L|E|V|I|P|R|O|Ver| Reserved | Next Protocol | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Instance ID/Locator-Status-Bits | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + Figure 4: LISP-gpe Version bits (P=1) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 7] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + +4. Backward Compatibility + + Undefined (in RFC6830) flag bits 5 and 7, LISP-gpe P and O bits, were + selected to ensure compatibility with existing LISP [RFC6830] + deployments. + + Similarly, using P = 0 to indicate that the format of the header and + payload conforms to [RFC6830] ensures compatibility with existing + LISP hardware forwarding platforms. + +4.1. LISP-gpe Routers to (legacy) LISP Routers + + A LISP-gpe router MUST not encapsulate non-IP packet nor OAM packets + to a LISP router. A method for determining the capabilities of a + LISP router (gpe or "legacy") is out of the scope of this draft. + + When encapsulating IP packets to a LISP router the P bit SHOULD be + set to 1 and the UDP port MUST be set to 4341. OAM bit MUST be set + to 0. The Next Protocol field SHOULD be 0x1 (IPv4) or 0x2 (IPv6). + The (legacy) LISP router will ignore the P bit and the protocol type + field. The (legacy) LISP router will treat the packet as a LISP + packet and inspect the first nibble of the payload to determine the + IP version. + + When the P bit is set, the N, E, and V bits MUST be set to zero. The + receiving (legacy) LISP router will ignore N, E and V bits, when the + P bit is set. + +4.2. (legacy) LISP Routers to LISP-gpe Routers + + When a LISP-gpe router receives a packet from a (legacy) LISP router, + the P bit MUST not be set and the UDP port MUST be 4341. The payload + MUST be IP, and the LISP-gpe router will inspect the first nibble of + the payload to determine IP version. + +4.3. Type of Service + + When a LISP-gpe router performs Ethernet encapsulation, the inner + 802.1Q [IEEE8021Q] priority code point (PCP) field MAY be mapped from + the encapsulated frame to the Type of Service field in the outer IPv4 + header, or in the case of IPv6 the 'Traffic Class' field. + +4.4. VLAN Identifier (VID) + + When a LISP-gpe router performs Ethernet encapsulation, the inner + header 802.1Q [IEEE8021Q] VLAN Identifier (VID) MAY be mapped to, or + used to determine the LISP Instance ID field. + + + + +Lewis, et al. Expires January 5, 2015 [Page 8] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + +5. LISP-gpe Examples + + This section provides two examples of IP protocols, and one example + of Ethernet encapsulated LISP-gpe using the generic extension + described in this document. + + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |N|L|E|V|I|1|0|0|0| Reserved | NP = IPv4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Instance ID/Locator-Status-Bits | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Original IPv4 Packet | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + Figure 5: IPv4 and LISP-gpe + + + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |N|L|E|V|I|1|0|0|0| Reserved | NP = IPv6 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Instance ID/Locator-Status-Bits | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Original IPv6 Packet | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + Figure 6: IPv6 and LISP-gpe + + + + + + + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 9] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |N|L|E|V|I|1|0|0|0| Reserved | NP = Ethernet | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Instance ID/Locator-Status-Bits | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Original Ethernet Frame | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + Figure 7: Ethernet and LISP-gpe + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 10] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + +6. Security Considerations + + LISP-gpe security considerations are similar to the LISP security + considerations documented at length in LISP [RFC6830]. With LISP- + gpe, issues such as dataplane spoofing, flooding, and traffic + redirection are dependent on the particular protocol payload + encapsulated. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 11] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + +7. Acknowledgments + + A special thank you goes to Dino Farinacci for his guidance and + detailed review. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 12] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + +8. IANA Considerations + + IANA is requested to set up a registry of "Next Protocol". These are + 8-bit values. Next Protocol values 0, 1, 2, 3 and 4 are defined in + this draft. New values are assigned via Standards Action [RFC5226]. + + +---------------+-------------+---------------+ + | Next Protocol | Description | Reference | + +---------------+-------------+---------------+ + | 0 | Reserved | This document | + | | | | + | 1 | IPv4 | This document | + | | | | + | 2 | IPv6 | This document | + | | | | + | 3 | Ethernet | This document | + | | | | + | 4 | NSH | This document | + | | | | + | 5..253 | Unassigned | | + +---------------+-------------+---------------+ + + Table 1 + + There are ten bits at the beginning of the LISP-gpe header. New + bits are assigned via Standards Action [RFC5226]. + + Bits 0-3 - Assigned by LISP [RFC6830] + Bit 4 - Instance ID (I bit) + Bit 5 - Next Protocol (P bit) + Bit 6 - Reserved + Bit 7 - OAM (O bit) + Bits 8-9 - Version + + + + + + + + + + + + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 13] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + +9. References + +9.1. Normative References + + [RFC0768] Postel, J., "User Datagram Protocol", STD 6, RFC 768, + August 1980. + + [RFC0791] Postel, J., "Internet Protocol", STD 5, RFC 791, + September 1981. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC5226] Narten, T. and H. Alvestrand, "Guidelines for Writing an + IANA Considerations Section in RFCs", BCP 26, RFC 5226, + May 2008. + +9.2. Informative References + + [ETYPES] The IEEE Registration Authority, "IEEE 802 Numbers", 2012, + <http://www.iana.org/assignments/ieee-802-numbers/ + ieee-802-numbers.xml>. + + [IEEE8021Q] + The IEEE Computer Society, "Media Access Control (MAC) + Bridges and Virtual Bridge Local Area Networks", August + 2012, <http://standards.ieee.org/getieee802/download/ + 802.1Q-2011.pdf>. + + [RFC1700] Reynolds, J. and J. Postel, "Assigned Numbers", RFC 1700, + October 1994. + + [RFC6830] Farinacci, D., Fuller, V., Meyer, D., and D. Lewis, "The + Locator/ID Separation Protocol (LISP)", RFC 6830, + January 2013. + + [RFC6834] Iannone, L., Saucez, D., and O. Bonaventure, "Locator/ID + Separation Protocol (LISP) Map-Versioning", RFC 6834, + January 2013. + + [VXLAN] Dutt, D., Mahalingam, M., Duda, K., Agarwal, P., Kreeger, + L., Sridhar, T., Bursell, M., and C. Wright, "VXLAN: A + Framework for Overlaying Virtualized Layer 2 Networks over + Layer 3 Networks", 2013. + + + + + + + +Lewis, et al. Expires January 5, 2015 [Page 14] + +Internet-Draft LISP Generic Protocol Extension July 2014 + + +Authors' Addresses + + Darrel Lewis + Cisco Systems, Inc. + + Email: darlewis@cisco.com + + + Puneet Agarwal + Broadcom + + Email: pagarwal@broadcom.com + + + Larry Kreeger + Cisco Systems, Inc. + + Email: kreeger@cisco.com + + + Fabio Maino + Cisco Systems, Inc. + + Email: fmaino@cisco.com + + + Paul Quinn + Cisco Systems, Inc. + + Email: paulq@cisco.com + + + Michael Smith + Cisco Systems, Inc. + + Email: michsmit@cisco.com + + + Navindra Yadav + Cisco Systems, Inc. + + Email: nyadav@cisco.com diff --git a/vnet/vnet/llc/llc.c b/vnet/vnet/llc/llc.c new file mode 100644 index 00000000000..f3b464f1ace --- /dev/null +++ b/vnet/vnet/llc/llc.c @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * llc.c: llc support + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/llc/llc.h> + +/* Global main structure. */ +llc_main_t llc_main; + +u8 * format_llc_protocol (u8 * s, va_list * args) +{ + llc_protocol_t p = va_arg (*args, u32); + llc_main_t * pm = &llc_main; + llc_protocol_info_t * pi = llc_get_protocol_info (pm, p); + + if (pi) + s = format (s, "%s", pi->name); + else + s = format (s, "0x%02x", p); + + return s; +} + +u8 * format_llc_header_with_length (u8 * s, va_list * args) +{ + llc_main_t * pm = &llc_main; + llc_header_t * h = va_arg (*args, llc_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + llc_protocol_t p = h->dst_sap; + uword indent, header_bytes; + + header_bytes = llc_header_length (h); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "llc header truncated"); + + indent = format_get_indent (s); + + s = format (s, "LLC %U -> %U", + format_llc_protocol, h->src_sap, + format_llc_protocol, h->dst_sap); + + if (h->control != 0x03) + s = format (s, ", control 0x%x", llc_header_get_control (h)); + + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + { + llc_protocol_info_t * pi = llc_get_protocol_info (pm, p); + vlib_node_t * node = vlib_get_node (pm->vlib_main, pi->node_index); + if (node->format_buffer) + s = format (s, "\n%U%U", + format_white_space, indent, + node->format_buffer, (void *) (h + 1), + max_header_bytes - header_bytes); + } + + return s; +} + +u8 * format_llc_header (u8 * s, va_list * args) +{ + llc_header_t * h = va_arg (*args, llc_header_t *); + return format (s, "%U", format_llc_header_with_length, h, 0); +} + +/* Returns llc protocol as an int in host byte order. */ +uword +unformat_llc_protocol (unformat_input_t * input, va_list * args) +{ + u8 * result = va_arg (*args, u8 *); + llc_main_t * pm = &llc_main; + int p, i; + + /* Numeric type. */ + if (unformat (input, "0x%x", &p) + || unformat (input, "%d", &p)) + { + if (p >= (1 << 8)) + return 0; + *result = p; + return 1; + } + + /* Named type. */ + if (unformat_user (input, unformat_vlib_number_by_name, + pm->protocol_info_by_name, &i)) + { + llc_protocol_info_t * pi = vec_elt_at_index (pm->protocol_infos, i); + *result = pi->protocol; + return 1; + } + + return 0; +} + +uword +unformat_llc_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + llc_header_t _h, * h = &_h; + u8 p; + + if (! unformat (input, "%U", unformat_llc_protocol, &p)) + return 0; + + h->src_sap = h->dst_sap = p; + h->control = 0x3; + + /* Add header to result. */ + { + void * p; + u32 n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, n_bytes); + memcpy (p, h, n_bytes); + } + + return 1; +} + +static uword llc_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + llc_header_t * h = rewrite; + llc_protocol_t protocol; + + if (max_rewrite_bytes < sizeof (h[0])) + return 0; + + switch (l3_type) { +#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = LLC_PROTOCOL_##b; break + _ (IP4, ip4); +#undef _ + default: + return 0; + } + + h->src_sap = h->dst_sap = protocol; + h->control = 0x3; + + return sizeof (h[0]); +} + +VNET_HW_INTERFACE_CLASS (llc_hw_interface_class) = { + .name = "LLC", + .format_header = format_llc_header_with_length, + .unformat_header = unformat_llc_header, + .set_rewrite = llc_set_rewrite, +}; + +static void add_protocol (llc_main_t * pm, + llc_protocol_t protocol, + char * protocol_name) +{ + llc_protocol_info_t * pi; + u32 i; + + vec_add2 (pm->protocol_infos, pi, 1); + i = pi - pm->protocol_infos; + + pi->name = protocol_name; + pi->protocol = protocol; + pi->next_index = pi->node_index = ~0; + + hash_set (pm->protocol_info_by_protocol, protocol, i); + hash_set_mem (pm->protocol_info_by_name, pi->name, i); +} + +static clib_error_t * llc_init (vlib_main_t * vm) +{ + clib_error_t * error; + llc_main_t * pm = &llc_main; + + memset (pm, 0, sizeof (pm[0])); + pm->vlib_main = vm; + + pm->protocol_info_by_name = hash_create_string (0, sizeof (uword)); + pm->protocol_info_by_protocol = hash_create (0, sizeof (uword)); + +#define _(f,n) add_protocol (pm, LLC_PROTOCOL_##f, #f); + foreach_llc_protocol; +#undef _ + + if ((error = vlib_call_init_function (vm, snap_init))) + return error; + + return vlib_call_init_function (vm, llc_input_init); +} + +VLIB_INIT_FUNCTION (llc_init); + diff --git a/vnet/vnet/llc/llc.h b/vnet/vnet/llc/llc.h new file mode 100644 index 00000000000..80131be5b03 --- /dev/null +++ b/vnet/vnet/llc/llc.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * llc.h: LLC definitions + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_llc_h +#define included_llc_h + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> + +/* Protocol (SSAP/DSAP) types. */ +#define foreach_llc_protocol \ + _ (null, 0x0) \ + _ (sublayer, 0x2) \ + _ (sna_path_control, 0x4) \ + _ (ip4, 0x6) \ + _ (sna1, 0x8) \ + _ (sna2, 0xc) \ + _ (sna3, 0x40) \ + _ (proway_lan, 0x0e) \ + _ (netware1, 0x10) \ + _ (netware2, 0xe0) \ + _ (osi_layer1, 0x14) \ + _ (osi_layer2, 0x20) \ + _ (osi_layer3, 0x34) \ + _ (osi_layer4, 0x54) \ + _ (osi_layer5, 0xfe) \ + _ (bpdu, 0x42) \ + _ (arp, 0x98) \ + _ (snap, 0xaa) \ + _ (vines1, 0xba) \ + _ (vines2, 0xbc) \ + _ (netbios, 0xf0) \ + _ (global_dsap, 0xff) + +typedef enum { +#define _(f,n) LLC_PROTOCOL_##f = n, + foreach_llc_protocol +#undef _ +} llc_protocol_t; + +typedef struct { +#define LLC_DST_SAP_IS_GROUP (1 << 0) +#define LLC_SRC_SAP_IS_RESPONSE (1 << 0) + u8 dst_sap, src_sap; + + /* Control byte. + [0] 1 => supervisory 0 => information + [1] unnumbered frame. */ + u8 control; + + /* Only present if (control & 3) != 3. */ + u8 extended_control[0]; +} llc_header_t; + +always_inline u16 +llc_header_get_control (llc_header_t * h) +{ + u16 r = h->control; + return r | ((((r & 3) != 3) ? h->extended_control[0] : 0) << 8); +} + +always_inline u8 +llc_header_length (llc_header_t * h) +{ + return ((h->control & 3) != 3 ? 4 : 3); +} + +typedef struct { + /* Name (a c string). */ + char * name; + + /* LLC protocol (SAP type). */ + llc_protocol_t protocol; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} llc_protocol_info_t; + +#define foreach_llc_error \ + _ (NONE, "no error") \ + _ (UNKNOWN_PROTOCOL, "unknown llc ssap/dsap") \ + _ (UNKNOWN_CONTROL, "control != 0x3") + +typedef enum { +#define _(f,s) LLC_ERROR_##f, + foreach_llc_error +#undef _ + LLC_N_ERROR, +} llc_error_t; + +typedef struct { + vlib_main_t * vlib_main; + + llc_protocol_info_t * protocol_infos; + + /* Hash tables mapping name/protocol to protocol info index. */ + uword * protocol_info_by_name, * protocol_info_by_protocol; + + /* llc-input next index indexed by protocol. */ + u8 input_next_by_protocol[256]; +} llc_main_t; + +always_inline llc_protocol_info_t * +llc_get_protocol_info (llc_main_t * m, llc_protocol_t protocol) +{ + uword * p = hash_get (m->protocol_info_by_protocol, protocol); + return p ? vec_elt_at_index (m->protocol_infos, p[0]) : 0; +} + +extern llc_main_t llc_main; + +/* Register given node index to take input for given llc type. */ +void +llc_register_input_protocol (vlib_main_t * vm, + llc_protocol_t protocol, + u32 node_index); + +void llc_set_adjacency (vnet_rewrite_header_t * rw, + uword max_data_bytes, + llc_protocol_t protocol); + +format_function_t format_llc_protocol; +format_function_t format_llc_header; +format_function_t format_llc_header_with_length; + +/* Parse llc protocol as 0xXXXX or protocol name. */ +unformat_function_t unformat_llc_protocol; + +/* Parse llc header. */ +unformat_function_t unformat_llc_header; +unformat_function_t unformat_pg_llc_header; + +always_inline void +llc_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + pg_node_t * pn = pg_get_node (node_index); + + n->format_buffer = format_llc_header_with_length; + n->unformat_buffer = unformat_llc_header; + pn->unformat_edit = unformat_pg_llc_header; +} + +#endif /* included_llc_h */ diff --git a/vnet/vnet/llc/node.c b/vnet/vnet/llc/node.c new file mode 100644 index 00000000000..1e54a53cc9c --- /dev/null +++ b/vnet/vnet/llc/node.c @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * llc_node.c: llc packet processing + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/llc/llc.h> + +#define foreach_llc_input_next \ + _ (PUNT, "error-punt") \ + _ (DROP, "error-drop") + +typedef enum { +#define _(s,n) LLC_INPUT_NEXT_##s, + foreach_llc_input_next +#undef _ + LLC_INPUT_N_NEXT, +} llc_input_next_t; + +typedef struct { + u8 packet_data[32]; +} llc_input_trace_t; + +static u8 * format_llc_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + llc_input_trace_t * t = va_arg (*va, llc_input_trace_t *); + + s = format (s, "%U", format_llc_header, t->packet_data); + + return s; +} + +static uword +llc_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + llc_main_t * lm = &llc_main; + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, + from, + n_left_from, + sizeof (from[0]), + sizeof (llc_input_trace_t)); + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + llc_header_t * h0, * h1; + u8 next0, next1, len0, len1, enqueue_code; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * b2, * b3; + + b2 = vlib_get_buffer (vm, from[2]); + b3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (b2, LOAD); + vlib_prefetch_buffer_header (b3, LOAD); + + CLIB_PREFETCH (b2->data, sizeof (h0[0]), LOAD); + CLIB_PREFETCH (b3->data, sizeof (h1[0]), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = (void *) (b0->data + b0->current_data); + h1 = (void *) (b1->data + b1->current_data); + + len0 = llc_header_length (h0); + len1 = llc_header_length (h1); + + b0->current_data += len0; + b1->current_data += len1; + + b0->current_length -= len0; + b1->current_length -= len1; + + next0 = lm->input_next_by_protocol[h0->dst_sap]; + next1 = lm->input_next_by_protocol[h1->dst_sap]; + + b0->error = node->errors[next0 == LLC_INPUT_NEXT_DROP ? LLC_ERROR_UNKNOWN_PROTOCOL : LLC_ERROR_NONE]; + b1->error = node->errors[next1 == LLC_INPUT_NEXT_DROP ? LLC_ERROR_UNKNOWN_PROTOCOL : LLC_ERROR_NONE]; + + enqueue_code = (next0 != next_index) + 2*(next1 != next_index); + + if (PREDICT_FALSE (enqueue_code != 0)) + { + switch (enqueue_code) + { + case 1: + /* A B A */ + to_next[-2] = bi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next0, bi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next1, bi1); + break; + + case 3: + /* A B B or A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, next0, bi0); + vlib_set_next_frame_buffer (vm, node, next1, bi1); + if (next0 == next1) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + next_index = next1; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + } + } + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + llc_header_t * h0; + u8 next0, len0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = (void *) (b0->data + b0->current_data); + + len0 = llc_header_length (h0); + + b0->current_data += len0; + + b0->current_length -= len0; + + next0 = lm->input_next_by_protocol[h0->dst_sap]; + + b0->error = node->errors[next0 == LLC_INPUT_NEXT_DROP ? LLC_ERROR_UNKNOWN_PROTOCOL : LLC_ERROR_NONE]; + + /* Sent packet to wrong next? */ + if (PREDICT_FALSE (next0 != next_index)) + { + /* Return old frame; remove incorrectly enqueued packet. */ + vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); + + /* Send to correct next. */ + next_index = next0; + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static char * llc_error_strings[] = { +#define _(f,s) s, + foreach_llc_error +#undef _ +}; + +VLIB_REGISTER_NODE (llc_input_node) = { + .function = llc_input, + .name = "llc-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = LLC_N_ERROR, + .error_strings = llc_error_strings, + + .n_next_nodes = LLC_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [LLC_INPUT_NEXT_##s] = n, + foreach_llc_input_next +#undef _ + }, + + .format_buffer = format_llc_header_with_length, + .format_trace = format_llc_input_trace, + .unformat_buffer = unformat_llc_header, +}; + +static clib_error_t * llc_input_init (vlib_main_t * vm) +{ + llc_main_t * lm = &llc_main; + + { + clib_error_t * error = vlib_call_init_function (vm, llc_init); + if (error) + clib_error_report (error); + } + + llc_setup_node (vm, llc_input_node.index); + + { + int i; + for (i = 0; i < ARRAY_LEN (lm->input_next_by_protocol); i++) + lm->input_next_by_protocol[i] = LLC_INPUT_NEXT_DROP; + } + + return 0; +} + +VLIB_INIT_FUNCTION (llc_input_init); + +void +llc_register_input_protocol (vlib_main_t * vm, + llc_protocol_t protocol, + u32 node_index) +{ + llc_main_t * lm = &llc_main; + llc_protocol_info_t * pi; + + { + clib_error_t * error = vlib_call_init_function (vm, llc_input_init); + if (error) + clib_error_report (error); + } + + pi = llc_get_protocol_info (lm, protocol); + pi->node_index = node_index; + pi->next_index = vlib_node_add_next (vm, + llc_input_node.index, + node_index); + + lm->input_next_by_protocol[protocol] = pi->next_index; +} diff --git a/vnet/vnet/llc/pg.c b/vnet/vnet/llc/pg.c new file mode 100644 index 00000000000..eb6c6a18b27 --- /dev/null +++ b/vnet/vnet/llc/pg.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * llc_pg.c: packet generator llc interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/llc/llc.h> + +typedef struct { + pg_edit_t dst_sap; + pg_edit_t src_sap; + pg_edit_t control; +} pg_llc_header_t; + +static inline void +pg_llc_header_init (pg_llc_header_t * e) +{ + pg_edit_init (&e->dst_sap, llc_header_t, dst_sap); + pg_edit_init (&e->src_sap, llc_header_t, src_sap); + pg_edit_init (&e->control, llc_header_t, control); +} + +uword +unformat_pg_llc_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_llc_header_t * h; + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (llc_header_t), + &group_index); + pg_llc_header_init (h); + + pg_edit_set_fixed (&h->control, 0x03); + + error = 1; + if (! unformat (input, "%U -> %U", + unformat_pg_edit, + unformat_llc_protocol, &h->src_sap, &h->dst_sap)) + goto done; + + { + llc_main_t * pm = &llc_main; + llc_protocol_info_t * pi = 0; + pg_node_t * pg_node = 0; + + if (h->dst_sap.type == PG_EDIT_FIXED) + { + u8 t = *h->dst_sap.values[PG_EDIT_LO]; + pi = llc_get_protocol_info (pm, t); + if (pi && pi->node_index != ~0) + pg_node = pg_get_node (pi->node_index); + } + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto done; + } + + error = 0; + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/map/examples/gen-rules.py b/vnet/vnet/map/examples/gen-rules.py new file mode 100755 index 00000000000..d6746f79af4 --- /dev/null +++ b/vnet/vnet/map/examples/gen-rules.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3.4 + +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ipaddress +import argparse +import sys + +# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6 +# map add rule index <0> psid <psid> ip6-dst <ip6-dst> + +parser = argparse.ArgumentParser(description='MAP VPP configuration generator') +parser.add_argument('-t', action="store", dest="mapmode") +args = parser.parse_args() + +# +# 1:1 Shared IPv4 address, shared BR, Terastream +# +def terastream(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/22') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + ip6_src = ipaddress.ip_address('cccc:bbbb::') + for i in range(ip4_pfx.num_addresses): + if not i % 64: + ip6_src = ip6_src + 1 + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src " + str(ip6_src) + + " ea-bits-len 0 psid-offset 0 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Shared IPv4 address, shared BR, OTE +# +def oteshared11(): + ip4_pfx = ipaddress.ip_network('2.84.63.0/24') + dst = list(ipaddress.ip_network('2a02:580:8c00::/40').subnets(new_prefix=56)) + psid_len = 6 + ip6_src = ipaddress.ip_address('2a02::') + for i in range(ip4_pfx.num_addresses): + if not i % 64: + ip6_src = ip6_src + 1 + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src " + str(ip6_src) + + " ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + enduserprefix = list(dst.pop(0).subnets(new_prefix=64))[255-1] + print("map add rule index", i, "psid", psid, "ip6-dst", enduserprefix[(i * (0x1<<psid_len)) + psid]) + + +# +# 1:1 Shared IPv4 address, shared BR, Terastream +# +def confdterastream(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/22') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + ip6_src = ipaddress.ip_address('cccc:bbbb::') + for i in range(ip4_pfx.num_addresses): + if not i % 64: + ip6_src = ip6_src + 1 + print("vpp softwire softwire-instances softwire-instance", i, "br-ipv6 " + str(ip6_src) + " ipv6-prefix ::/0" + " ipv4-prefix " + str(ip4_pfx[i]) + + "/32 ea-len 0 psid-offset 6 psid-len", psid_len) +# print("vpp softwire softwire-instances softwire-instance", i, "ipv4-pfx " + str(ip4_pfx[i]) + "/32 ipv6-pfx ::/0 br-ipv6 " + str(ip6_src) + +# " ea-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("binding", psid, "ipv6-addr", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +def shared11br_yang(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("vpp softwire softwire-instances softwire-instance " + str(i) + " ipv4-prefix " + str(ip4_pfx[i]) + "/32 " + + "ipv6-prefix ::/0 ea-len 0 psid-offset 6 tunnel-mtu 1234 psid-len", psid_len) + #print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", + # "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + # print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + print("binding", psid, "ipv6-addr", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +def shared11br_xml(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/32') + ip6_dst = ipaddress.ip_network('bbbb::/32') + ip6_src = ipaddress.ip_address('cccc:bbbb::') + psid_len = 6 + print('<vpp xmlns="http://www.cisco.com/yang/cisco-vpp"><softwire><softwire-instances>'); + count = 1024; + for i in range(ip4_pfx.num_addresses): + if not i % 64: + ip6_src = ip6_src + 1 + if count == 0: + break; + count = count - 1; + print('<softwire-instance>') + print(' <id>'+ str(i)+ '</id>') + print(' <ipv4-prefix>'+ str(ip4_pfx[i])+ '/32</ipv4-prefix>') + print(' <ipv6-prefix>::/0</ipv6-prefix>') + print(' <ea-len>0</ea-len>') + print(' <psid-offset>0</psid-offset>') + print(' <psid-len>'+ str(psid_len) + '</psid-len>') + for psid in range(0x1 << psid_len): + print(' <binding>') + print(' <psid>', psid, '</psid>') + print(' <ipv6-addr>'+ str(ip6_dst[(i * (0x1<<psid_len)) + psid]) + '</ipv6-addr>') + print(' </binding>') + print('</softwire-instance>') + print('</softwire-instances></softwire>') + print('</vpp>') + +# +# 1:1 Shared IPv4 address, shared BR +# +def shared11br(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Shared IPv4 address, shared BR +# +def shared11br(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + + +# +# 1:1 Shared IPv4 address +# +def shared11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Shared IPv4 address small +# +def smallshared11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/24') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Full IPv4 address +# +def full11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 0 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 0 psid-len 0") +def full11br(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 0 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 0 psid-len 0") + +# +# Algorithmic mapping Shared IPv4 address +# +def algo(): + print("map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8") + print("map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0") + +# +# IP4 forwarding +# +def ip4(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + for i in range(ip4_pfx.num_addresses): + print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2") + + +globals()[args.mapmode]() + + diff --git a/vnet/vnet/map/examples/map-test.py b/vnet/vnet/map/examples/map-test.py new file mode 100755 index 00000000000..01f377fb6ee --- /dev/null +++ b/vnet/vnet/map/examples/map-test.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys, time +from scapy.all import * + +import mapalgs + + +ifname = "vpp-tap" + +loc_v4_mac = "aa:aa:aa:aa:aa:a4" +loc_v6_mac = "aa:aa:aa:aa:aa:a6" +vpp_mac = "aa:aa:aa:aa:00:00" + +map_t = 1 + +fragsize = 0 +map_mtu = 200 + +def mac_to_vppmac(mac): + mac = mac.replace(':', '') + return mac[0:4]+"."+mac[4:8]+"."+mac[8:12] + + +map = mapalgs.MapCalc( rulev6 = 'bbbb::/32', + rulev4 = '20.0.0.0/24', + ratio = 256); + +dmr = mapalgs.DmrCalc('cccc:bbbb::/96') + + +ICMP_TYPES_CODES = { + 0: 0, + 3: 15, + 4: 0, + 5: 3, + 6: 0, + 8: 0, + 9: 0, + 10: 0, + 11: 1, + 12: 2, + 13: 0, + 14: 0, + 15: 0, + 16: 0, + 17: 0, + 18: 0 +} + +ICMP6_TYPES_CODES = { + 1: 7, + 2: 0, + 3: 1, + 4: 3, +} + +def net_conf(): + c = "" + c += "tap connect "+ifname+" hwaddr "+mac_to_vppmac(vpp_mac)+" \n" + c += "set int state tap-0 up \n" + c += "set ip6 neighbor tap-0 2001:f00d::1 "+mac_to_vppmac(loc_v6_mac)+" \n" + c += "set ip arp tap-0 10.0.0.1 "+mac_to_vppmac(loc_v4_mac)+" \n" + c += "ip route add ::/0 via 2001:f00d::1 tap-0 \n" + c += "ip route add 0.0.0.0/0 via 10.0.0.1 tap-0 \n" + return c + +def conf(): + c = net_conf() + c += "map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ea-bits-len 16 psid-offset 6 psid-len 8" + if map_mtu != 0: + c += " mtu "+str(map_mtu) + if map_t: + c += " ip6-src cccc:bbbb::/96 map-t" + else: + c += " ip6-src cccc:bbbb::ffff" + + c += "\n" + return c + +def send_packet(ip_header, ip_content): + print("Send packet") + if fragsize != 0: + if ip_header.version == 4: + frags = fragment(ip_header/ip_content, fragsize=fragsize) + for f in frags: + print("Fragmented IPv4 packet") + sendp(Ether(dst=vpp_mac, src=loc_v4_mac)/f, iface=ifname) + elif ip_header.version == 6: + frags = fragment6(ip_header/IPv6ExtHdrFragment()/ip_content, fragsize) + for f in frags: + print("Fragmented IPv6 packet") + sendp(Ether(dst=vpp_mac, src=loc_v6_mac)/f, iface=ifname) + else: + sendp(Ether(dst=vpp_mac)/ip_header/ip_content, iface=ifname) + +def send_packet_frag_inner(packet, inner_header, inner_content): + print("Send packet with inner ICMP packet") + if fragsize != 0: + if packet.version == 4: + frags = fragment(inner_header/inner_content, fragsize=fragsize) + for f in frags: + print("Fragmented IPv4 inner packet") + sendp(Ether(dst=vpp_mac, src=loc_v4_mac)/packet/f, iface=ifname) + elif packet.version == 6: + frags = fragment6(inner_header/IPv6ExtHdrFragment()/inner_content, fragsize) + for f in frags: + print("Fragmented IPv6 inner packet") + sendp(Ether(dst=vpp_mac, src=loc_v6_mac)/packet/f, iface=ifname) + else: + sendp(Ether(dst=vpp_mac)/packet/inner_header/inner_content, iface=ifname) + + +def sendv6udp(src, dst, port): + psid = map.gen_psid(port) + ceaddr = str(map.get_mapce_addr(src, psid)) + dst = str(dmr.embed_6052addr(dst)) + send_packet(IPv6(dst=dst, src=ceaddr), UDP(sport=port)/('X'*900)) + +def sendv6tcp(src, dst, port): + psid = map.gen_psid(port) + ceaddr = str(map.get_mapce_addr(src, psid)) + dst = str(dmr.embed_6052addr(dst)) + send_packet(IPv6(dst=dst, src=ceaddr), TCP(sport=port)/('X'*900)) + +def sendv4udp(src, dst, port): + send_packet(IP(dst=dst, src=src), UDP(dport=port)/('X'*900)) + +def sendv4tcp(src, dst, port): + send_packet(IP(dst=dst, src=src), TCP(dport=port)/('X'*900)) + +def sendv6ping(src, dst, id): + psid = map.gen_psid(id) + ceaddr = str(map.get_mapce_addr(src, psid)) + dst = str(dmr.embed_6052addr(dst)) + send_packet(IPv6(dst=dst, src=ceaddr), ICMPv6EchoRequest(id=id, data='A'*500)) + send_packet(IPv6(dst=dst, src=ceaddr), ICMPv6EchoReply(id=id, data='A'*500)) + +def sendv4ping(src, dst, id): + send_packet(IP(dst=dst, src=src), ICMP(id=id, type=0)/('X'*500)) + send_packet(IP(dst=dst, src=src), ICMP(id=id, type=8)/('X'*500)) + +def sendv4icmperr(src, dst, type, code, port, inner_src, inner_dst, payload_length): + inner = IP(dst=inner_dst, src=inner_src)/TCP(sport=port, dport=8888)/('X'*payload_length) + send_packet_frag_inner(IP(dst=dst, src=src)/ICMP(type=type, code=code), IP(dst=inner_dst, src=inner_src), TCP(sport=port, dport=8888)/('X'*payload_length)) + #send_packet(IP(dst=dst, src=src)/ICMP(type=type, code=code)/inner) + +def sendv6icmperr(src, dst, type, code, port, payload_length): + psid = map.gen_psid(port) + src = str(map.get_mapce_addr(src, psid)) + dst = str(dmr.embed_6052addr(dst)) + inner_header = IPv6(dst=src, src=dst) + inner_content = TCP(sport=8888, dport=port)/('X'*payload_length) + send_packet_frag_inner(IPv6(dst=dst, src=src)/ICMPv6DestUnreach(type=type, code=code), inner_header, inner_content) + #send_packet(IPv6(dst=dst, src=src)/ICMPv6DestUnreach(type=type, code=code)/inner) + +def sendv4icmp_errors(src, dst, port, inner_src, inner_dst, payload_length): + for type in ICMP_TYPES_CODES: + for code in range(0, ICMP_TYPES_CODES[type] + 1): + sendv4icmperr(src, dst, type, code, port, inner_src, inner_dst, payload_length) + #sendv4icmperr(src, dst, type, ICMP_TYPES_CODES[type] + 2, port, inner_src, inner_dst, payload_length) + #sendv4icmperr(src, dst, type, 255, port, inner_src, inner_dst, payload_length) + #sendv4icmperr(src, dst, 1, 0, port, inner_src, inner_dst, payload_length) + #sendv4icmperr(src, dst, 2, 10, port, inner_src, inner_dst, payload_length) + #sendv4icmperr(src, dst, 255, 255, port, inner_src, inner_dst, payload_length) + + #TODO: Check wrong paramater with different pointer values + +def sendv6icmp_errors(src, dst, port, payload_length): + for type in ICMP6_TYPES_CODES: + for code in range(0, ICMP6_TYPES_CODES[type] + 1): + sendv6icmperr(src, dst, type, code, port, payload_length) + #sendv6icmperr(src, dst, type, ICMP6_TYPES_CODES[type] + 2, port, payload_length) + #sendv6icmperr(src, dst, type, 255, port, payload_length) + + +def traffic(): + delay = 2.0 + while 1: + #sendp(Ether(dst="bb:bb:bb:bb:bb:b4")/IP(dst="20.0.0.1")/UDP(chksum=0)/('X'*900), iface="vpp-tapv4") + #sendp(Ether(dst="bb:bb:bb:bb:bb:b6")/IPv6(dst="cccc:bbbb::a000:0001")/ICMPv6EchoRequest()/('X'*900), iface="vpp-tapv6") + #sendp(Ether(dst="bb:bb:bb:bb:bb:b6")/IPv6(dst="cccc:bbbb::a000:0001")/UDP()/('X'*900), iface="vpp-tapv6") + sendv6udp("20.0.0.1", "10.0.0.1", 12001) + sendv6tcp("20.0.0.1", "10.0.0.1", 12002) + sendv4udp("10.0.0.1", "20.0.0.1", 12003) + sendv4tcp("10.0.0.1", "20.0.0.1", 12004) + sendv6ping("20.0.0.1", "10.0.0.1", 12005) + sendv4ping("10.0.0.1", "20.0.0.1", 12006) + sendv4icmp_errors("10.0.0.1", "20.0.0.1", 12006, "20.0.0.1", "10.0.0.1", 500) + sendv4icmp_errors("10.0.0.1", "20.0.0.1", 12006, "20.0.0.1", "10.0.0.1", 1500) + sendv6icmp_errors("20.0.0.1", "10.0.0.1", 12006, 500) + time.sleep(delay) + delay *= 0.9 + +if len(sys.argv) <= 1: + print("Usage: conf|traffic") + exit(1) + +if sys.argv[1] == "conf": + print(conf()) +elif sys.argv[1] == "traffic": + traffic()
\ No newline at end of file diff --git a/vnet/vnet/map/examples/mapalgs.py b/vnet/vnet/map/examples/mapalgs.py new file mode 100644 index 00000000000..50a0ed0a3ee --- /dev/null +++ b/vnet/vnet/map/examples/mapalgs.py @@ -0,0 +1,327 @@ +#!/usr/bin/env python3 + +# The MIT License (MIT) +# +# Copyright (c) 2015 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File included from https://github.com/ejordangottlieb/pyswmap +# Thanks to jordan ;) +# - Pierre +# + +# There is still a great deal of work required on this module. Please +# use with caution. +# -Jordan + +import sys +from ipaddress import ( + IPv6Address, + IPv6Network, + ip_network, + ip_address, + ) +from math import ( + log, + ) + +class MapCalc(object): + + def __init__(self,**bmr): + #rulev6,rulev4): + self.portranges = False + + # Validate and set BMR and BMR derived values + self._check_bmr_values(bmr) + + def _check_bmr_values(self,bmr): + # Assume these values have not been supplied. Validate later. + self.ealen = False + self.ratio = False + + # Validate that a proper PSID Offset has been set + if 'psidoffset' not in bmr: + # Set Default PSID Offset of 6 if it is not set + self.psidoffset = 6 + else: + self.psidoffset = self._psid_offset(bmr['psidoffset']) + + # Validate that a proper IPv4 rule prefix is defined + if 'rulev4' not in bmr: + print("The rule IPv4 prefix has not been set") + sys.exit(1) + else: + self.rulev4 = self._ipv4_rule(bmr['rulev4']) + + # Validate that a proper IPv6 rule prefix is defined + if 'rulev6' not in bmr: + print("The rule IPv6 prefix has not been set") + sys.exit(1) + else: + self.rulev6 = self._ipv6_rule(bmr['rulev6']) + + # Check if EA length was passed + if 'ealen' not in bmr: + self.ealen = False + else: + self.ealen = bmr['ealen'] + self.ratio = self._calc_ratio(bmr['ealen']) + + # Check if sharing ratio was passed or calculated by _calc_ratio + if 'ratio' not in bmr: + # Skip if we have already calculated ratio + if not (self.ratio): + self.ratio = False + else: + if (self.ealen): + # Check to see if supplied EA length contradicts supplied ratio + if ( bmr['ratio'] != self.ratio ): + eavalue = "EA value {}".format(self.ealen) + sharingratio = "sharing ratio {}".format(bmr['ratio']) + print("Supplied {} and {} are contradictory".format( + eavalue, + sharingratio) + ) + sys.exit(1) + else: + self.ratio = bmr['ratio'] + self.ealen = self._calc_ea(bmr['ratio']) + + # EA length or sharing ratio must be set + if not ( self.ealen or self.ratio): + print("The BMR must include an EA length or sharing ratio") + sys.exit(1) + + # Since we have not hit an exception we can calculate the port bits + self.portbits = self._calc_port_bits() + + def _ipv4_rule(self,rulev4): + try: + self.rulev4mask = ip_network( + rulev4, + strict=False + ).prefixlen + except ValueError: + print("Invalid IPv4 prefix {}".format(rulev4)) + sys.exit(1) + + self.rulev4object = ip_network(rulev4) + + return rulev4 + + def _ipv6_rule(self,rulev6): + try: + self.rulev6mask = IPv6Network( + rulev6, + strict=False + ).prefixlen + except ValueError: + print("Invalid IPv6 prefix {}".format(rulev6)) + sys.exit(1) + + return rulev6 + + def _psid_offset(self,psidoffset): + PSIDOFFSET_MAX = 6 + if psidoffset in range(0,PSIDOFFSET_MAX+1): + return psidoffset + else: + print("Invalid PSID Offset value: {}".format(psidoffset)) + sys.exit(1) + + def _psid_range(self,x): + rset = [] + for i in range(0,x+1): + rset.append(2**i) + return rset + + def _calc_port_bits(self): + portbits = 16 - self.psidoffset - self.psidbits + return portbits + + def _calc_ea(self,ratio): + if ratio not in ( self._psid_range(16) ): + print("Invalid ratio {}".format(ratio)) + print("Ratio between 2 to the power of 0 thru 16") + sys.exit(1) + + if ( 1 == ratio): + self.psidbits = 0 + else: + self.psidbits = int(log(ratio,2)) + ealen = self.psidbits + ( 32 - self.rulev4mask ) + return ealen + + def _calc_ratio(self,ealen): + maskbits = 32 - self.rulev4mask + if ( ealen < maskbits ): + print("EA of {} incompatible with rule IPv4 prefix {}".format( + ealen, + self.rulev4, + ) + ) + print("EA length must be at least {} bits".format( + maskbits, + ) + ) + sys.exit(1) + + self.psidbits = ealen - ( 32 - self.rulev4mask ) + if ( self.psidbits > 16): + print("EA length of {} is too large".format( + ealen, + ) + ) + print("EA should not exceed {} for rule IPv4 prefix {}".format( + maskbits + 16, + self.rulev4, + ) + ) + sys.exit(1) + ratio = 2**self.psidbits + return ratio + + def gen_psid(self,portnum): + if ( portnum < self.start_port() ): + print("port value is less than allowed by PSID Offset") + sys.exit(1) + psid = (portnum & ((2**self.psidbits - 1) << self.portbits)) + psid = psid >> self.portbits + return psid + + def port_ranges(self): + return 2**self.psidoffset - 1 + + def start_port(self): + if self.psidoffset == 0: return 0 + return 2**(16 - self.psidoffset) + + def port_list(self,psid): + startrange = psid * (2**self.portbits) + self.start_port() + increment = (2**self.psidbits) * (2**self.portbits) + portlist = [ ] + for port in range(startrange,startrange + 2**self.portbits): + if port >= 65536: continue + portlist.append(port) + for x in range(1,self.port_ranges()): + startrange += increment + for port in range(startrange,startrange + 2**self.portbits): + portlist.append(port) + return portlist + + def ipv4_index(self,ipv4addr): + if ip_address(ipv4addr) in ip_network(self.rulev4): + x = ip_address(ipv4addr) + y = ip_network(self.rulev4,strict=False).network_address + self.ipv4addr = x + return ( int(x) - int(y) ) + else: + print("Error: IPv4 address {} not in Rule IPv4 subnet {}".format( + ipv4add, + ip_network(self.rulev4,strict=False).network_address)) + sys.exit(1) + + def _calc_ipv6bit_pos(self): + addroffset = 128 - (self.rulev6mask + ( self.ealen - self.psidbits)) + psidshift = 128 - ( self.rulev6mask + self.ealen ) + return [addroffset,psidshift] + + def _append_map_eabits(self,ipv4index,addroffset,psidshift,psid): + rulev6base = IPv6Network(self.rulev6,strict=False).network_address + map_prefix = int(rulev6base) | ( ipv4index << addroffset ) + map_fullprefix = map_prefix | ( psid << psidshift) + return map_fullprefix + + + def get_mapce_addr(self,ipv4addr,psid): + ipv4index = self.ipv4_index(ipv4addr) + (addroffset,psidshift) = self._calc_ipv6bit_pos() + map_fullprefix = self._append_map_eabits(ipv4index, + addroffset, + psidshift, + psid) + mapv4iid = map_fullprefix | ( int(self.ipv4addr) << 16 ) + map_full_address = mapv4iid | psid + mapce_address = "{}".format(IPv6Address(map_full_address)) + return mapce_address + + def get_mapce_prefix(self,ipv4addr,psid): + ipv4index = self.ipv4_index(ipv4addr) + (addroffset,psidshift) = self._calc_ipv6bit_pos() + map_fullprefix = self._append_map_eabits(ipv4index, + addroffset, + psidshift, + psid) + mapce_prefix = "{}/{}".format( + IPv6Address(map_fullprefix), + self.rulev6mask + self.ealen + ) + return mapce_prefix + + def get_map_ipv4(self,mapce_address): + ipv4 = (int(IPv6Address(mapce_address)) & ( 0xffffffff << 16 )) >> 16 + return ip_address(ipv4) + + + +class DmrCalc(object): + + def __init__(self,dmr): + + # Validate and set BMR and BMR derived values + self.dmrprefix = self._check_dmr_prefix(dmr) + + def embed_6052addr(self,ipv4addr): + + try: + ipv4addrint = int(ip_address(ipv4addr)) + except ValueError: + print("Invalid IPv4 address {}".format(ipv4addr)) + sys.exit(1) + + if ( self.dmrprefix.prefixlen == 64 ): + ipv6int = ipv4addrint << 24 + ipv6int += int(self.dmrprefix.network_address) + return IPv6Address(ipv6int) + + if ( self.dmrprefix.prefixlen == 96 ): + ipv6int = ipv4addrint + ipv6int += int(self.dmrprefix.network_address) + return IPv6Address(ipv6int) + + def _check_dmr_prefix(self,dmrprefix): + try: + self.dmrmask = IPv6Network( + dmrprefix, + strict=False + ).prefixlen + except ValueError: + print("Invalid IPv6 prefix {}".format(prefix)) + sys.exit(1) + + if self.dmrmask not in (32,40,48,56,64,96): + print("Invalid prefix mask /{}".format(self.dmrmask)) + sys.exit(1) + + return IPv6Network(dmrprefix) + +if __name__ == "__main__": + m = DmrCalc('fd80::/48') + print(m.dmrprefix) diff --git a/vnet/vnet/map/examples/mt-test.py b/vnet/vnet/map/examples/mt-test.py new file mode 100644 index 00000000000..62d269c7a13 --- /dev/null +++ b/vnet/vnet/map/examples/mt-test.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +# Copyright (c) 2009-2014 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import threading +import time +from scapy.all import * +from Queue import * + +iface = 'veth1' + +class SnifferThread(threading.Thread) : + def __init__(self,q,iface,flt,timeout) : + threading.Thread.__init__(self) + self.q = q + self.iface = iface + self.timeout = timeout + self.flt = flt + print("Sniffers reporting for service on ",self.iface) + + def run(self) : + conf.iface=self.iface + conf.iface6=self.iface + + r = sniff(filter=self.flt,iface=self.iface,timeout=self.timeout,prn=lambda x: x.summary()) + self.q.put(r) + + + +# New "SR" function +# Fire off thread with filter and expected answer packet(s). +# Fire off sniffer thread, main thread sends packet +# Returns true if found + +def sr2(answer, *args, **kwargs): + q = Queue() + print("Creating SnifferThreadWorkerThread") + flt='ip proto 41' + iface='veth1' + sniffer = SnifferThread(q,iface,flt,1) + sniffer.setDaemon(True) + sniffer.start() + + print "Sending packet:" + send(*args, **kwargs) + sniffer.join() + ps = q.get() + +# ps.summary() + print "Number of packets sniffed:", len(ps) + + for p in ps: + ip = p.getlayer(1) + print "Comparing", ip.summary(), "and", answer.summary() + if ip == answer: + print "We have a match!!" + return True + return False + +aip6 = IPv6(dst='2002:0a0a:0a0a::12')/ICMPv6EchoRequest() +answer= IP(src="10.0.0.100",dst="10.10.10.10",ttl=63)/aip6 +packet = IPv6(dst='2002:0a0a:0a0a::12')/ICMPv6EchoRequest() + +# From IPv6 +sr2(answer, packet,iface='veth1') + +#From IPv4 +packet = IP(src='10.10.10.10',dst='10.0.0.100')/IPv6(src='2002:0a0a:0a0a::12',dst='1::2')/ICMPv6EchoRequest() +sr2(answer, packet,iface='veth1') diff --git a/vnet/vnet/map/gen-rules.py b/vnet/vnet/map/gen-rules.py new file mode 100755 index 00000000000..533a8e237f7 --- /dev/null +++ b/vnet/vnet/map/gen-rules.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python + +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import ipaddress +import argparse +import sys + +# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6 +# map add rule index <0> psid <psid> ip6-dst <ip6-dst> + +parser = argparse.ArgumentParser(description='MAP VPP configuration generator') +parser.add_argument('-t', action="store", dest="mapmode") +args = parser.parse_args() + +# +# 1:1 Shared IPv4 address, shared BR +# +def shared11br(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + + +# +# 1:1 Shared IPv4 address +# +def shared11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Shared IPv4 address small +# +def smallshared11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/24') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Full IPv4 address +# +def full11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 0 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 0 psid-len 0") +def full11br(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 0 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 0 psid-len 0") + +# +# Algorithmic mapping Shared IPv4 address +# +def algo(): + print("map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8") + print("map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0") + +# +# IP4 forwarding +# +def ip4(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + for i in range(ip4_pfx.num_addresses): + print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2") + + +globals()[args.mapmode]() + + diff --git a/vnet/vnet/map/ip4_map.c b/vnet/vnet/map/ip4_map.c new file mode 100644 index 00000000000..cf53ef4918c --- /dev/null +++ b/vnet/vnet/map/ip4_map.c @@ -0,0 +1,591 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Defines used for testing various optimisation schemes + */ +#define MAP_ENCAP_DUAL 0 + +#include "map.h" +#include "../ip/ip_frag.h" + +vlib_node_registration_t ip4_map_reass_node; + +enum ip4_map_next_e { + IP4_MAP_NEXT_IP6_LOOKUP, +#ifdef MAP_SKIP_IP6_LOOKUP + IP4_MAP_NEXT_IP6_REWRITE, +#endif + IP4_MAP_NEXT_FRAGMENT, + IP4_MAP_NEXT_REASS, + IP4_MAP_NEXT_DROP, + IP4_MAP_N_NEXT, +}; + +enum ip4_map_reass_next_t { + IP4_MAP_REASS_NEXT_IP6_LOOKUP, + IP4_MAP_REASS_NEXT_IP4_FRAGMENT, + IP4_MAP_REASS_NEXT_DROP, + IP4_MAP_REASS_N_NEXT, +}; + +typedef struct { + u32 map_domain_index; + u16 port; + u8 cached; +} map_ip4_map_reass_trace_t; + +u8 * +format_ip4_map_reass_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *); + map_ip4_map_reass_trace_t *t = va_arg (*args, map_ip4_map_reass_trace_t *); + return format(s, "MAP domain index: %d L4 port: %u Status: %s", t->map_domain_index, + t->port, t->cached?"cached":"forwarded"); +} + +/* + * ip4_map_get_port + */ +u16 +ip4_map_get_port (ip4_header_t *ip, map_dir_e dir) +{ + /* Find port information */ + if (PREDICT_TRUE((ip->protocol == IP_PROTOCOL_TCP) || + (ip->protocol == IP_PROTOCOL_UDP))) { + udp_header_t *udp = (void *)(ip + 1); + return (dir == MAP_SENDER ? udp->src_port : udp->dst_port); + } else if (ip->protocol == IP_PROTOCOL_ICMP) { + /* + * 1) ICMP Echo request or Echo reply + * 2) ICMP Error with inner packet being UDP or TCP + * 3) ICMP Error with inner packet being ICMP Echo request or Echo reply + */ + icmp46_header_t *icmp = (void *)(ip + 1); + if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply) { + return *((u16 *)(icmp + 1)); + } else if (clib_net_to_host_u16(ip->length) >= 64) { // IP + ICMP + IP + L4 header + ip4_header_t *icmp_ip = (ip4_header_t *)(icmp + 2); + if (PREDICT_TRUE((icmp_ip->protocol == IP_PROTOCOL_TCP) || + (icmp_ip->protocol == IP_PROTOCOL_UDP))) { + udp_header_t *udp = (void *)(icmp_ip + 1); + return (dir == MAP_SENDER ? udp->dst_port : udp->src_port); + } else if (icmp_ip->protocol == IP_PROTOCOL_ICMP) { + icmp46_header_t *inner_icmp = (void *)(icmp_ip + 1); + if (inner_icmp->type == ICMP4_echo_request || inner_icmp->type == ICMP4_echo_reply) + return (*((u16 *)(inner_icmp + 1))); + } + } + } + return (0); +} + +static_always_inline u16 +ip4_map_port_and_security_check (map_domain_t *d, ip4_header_t *ip, u32 *next, u8 *error) +{ + u16 port = 0; + + if (d->psid_length > 0) { + if (!ip4_is_fragment(ip)) { + if (PREDICT_FALSE((ip->ip_version_and_header_length != 0x45) || clib_host_to_net_u16(ip->length) < 28)) { + return 0; + } + port = ip4_map_get_port(ip, MAP_RECEIVER); + if (port) { + /* Verify that port is not among the well-known ports */ + if ((d->psid_offset > 0) && (clib_net_to_host_u16(port) < (0x1 << (16 - d->psid_offset)))) { + *error = MAP_ERROR_ENCAP_SEC_CHECK; + } else { + return (port); + } + } else { + *error = MAP_ERROR_BAD_PROTOCOL; + } + } else { + *next = IP4_MAP_NEXT_REASS; + } + } + return (0); +} + +/* + * ip4_map_vtcfl + */ +static_always_inline u32 +ip4_map_vtcfl (ip4_header_t *ip4, vlib_buffer_t *p) +{ + map_main_t *mm = &map_main; + u8 tc = mm->tc_copy ? ip4->tos : mm->tc; + u32 vtcfl = 0x6 << 28; + vtcfl |= tc << 20; + vtcfl |= vnet_buffer(p)->ip.flow_hash && 0x000fffff; + + return (clib_host_to_net_u32(vtcfl)); +} + +static_always_inline bool +ip4_map_ip6_lookup_bypass (vlib_buffer_t *p0, ip4_header_t *ip) +{ +#ifdef MAP_SKIP_IP6_LOOKUP + map_main_t *mm = &map_main; + u32 adj_index0 = mm->adj6_index; + if (adj_index0 > 0) { + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm6, mm->adj6_index); + if (adj->n_adj > 1) { + u32 hash_c0 = ip4_compute_flow_hash(ip, IP_FLOW_HASH_DEFAULT); + adj_index0 += (hash_c0 & (adj->n_adj - 1)); + } + vnet_buffer(p0)->ip.adj_index[VLIB_TX] = adj_index0; + return (true); + } +#endif + return (false); +} + +/* + * ip4_map + */ +static uword +ip4_map (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_node.index); + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 cpu_index = os_get_cpu_number(); + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Dual loop */ + while (n_left_from > 4 && n_left_to_next > 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + map_domain_t *d0, *d1; + u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE; + ip4_header_t *ip40, *ip41; + u16 port0 = 0, port1 = 0; + ip6_header_t *ip6h0, *ip6h1; + u32 map_domain_index0 = ~0, map_domain_index1 = ~0; + u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 = IP4_MAP_NEXT_IP6_LOOKUP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer(vm, from[2]); + p3 = vlib_get_buffer(vm, from[3]); + + vlib_prefetch_buffer_header(p2, STORE); + vlib_prefetch_buffer_header(p3, STORE); + /* IPv4 + 8 = 28. possibly plus -40 */ + CLIB_PREFETCH (p2->data-40, 68, STORE); + CLIB_PREFETCH (p3->data-40, 68, STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next +=2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip40 = vlib_buffer_get_current(p0); + ip41 = vlib_buffer_get_current(p1); + p0->current_length = clib_net_to_host_u16(ip40->length); + p1->current_length = clib_net_to_host_u16(ip41->length); + d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &map_domain_index0); + d1 = ip4_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], &map_domain_index1); + ASSERT(d0); + ASSERT(d1); + + /* + * Shared IPv4 address + */ + port0 = ip4_map_port_and_security_check(d0, ip40, &next0, &error0); + port1 = ip4_map_port_and_security_check(d1, ip41, &next1, &error1); + + /* MAP calc */ + u32 da40 = clib_net_to_host_u32(ip40->dst_address.as_u32); + u32 da41 = clib_net_to_host_u32(ip41->dst_address.as_u32); + u16 dp40 = clib_net_to_host_u16(port0); + u16 dp41 = clib_net_to_host_u16(port1); + u64 dal60 = map_get_pfx(d0, da40, dp40); + u64 dal61 = map_get_pfx(d1, da41, dp41); + u64 dar60 = map_get_sfx(d0, da40, dp40); + u64 dar61 = map_get_sfx(d1, da41, dp41); + if (dal60 == 0 && dar60 == 0) error0 = MAP_ERROR_UNKNOWN; + if (dal61 == 0 && dar61 == 0) error1 = MAP_ERROR_UNKNOWN; + + /* construct ipv6 header */ + vlib_buffer_advance(p0, - sizeof(ip6_header_t)); + vlib_buffer_advance(p1, - sizeof(ip6_header_t)); + ip6h0 = vlib_buffer_get_current(p0); + ip6h1 = vlib_buffer_get_current(p1); + vnet_buffer(p0)->sw_if_index[VLIB_TX] = (u32)~0; + vnet_buffer(p1)->sw_if_index[VLIB_TX] = (u32)~0; + + ip6h0->ip_version_traffic_class_and_flow_label = ip4_map_vtcfl(ip40, p0); + ip6h1->ip_version_traffic_class_and_flow_label = ip4_map_vtcfl(ip41, p1); + ip6h0->payload_length = ip40->length; + ip6h1->payload_length = ip41->length; + ip6h0->protocol = IP_PROTOCOL_IP_IN_IP; + ip6h1->protocol = IP_PROTOCOL_IP_IN_IP; + ip6h0->hop_limit = 0x40; + ip6h1->hop_limit = 0x40; + ip6h0->src_address = d0->ip6_src; + ip6h1->src_address = d1->ip6_src; + ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64(dal60); + ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64(dar60); + ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64(dal61); + ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64(dar61); + + /* + * Determine next node. Can be one of: + * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + */ + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) { + if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip6h0->payload_length) + sizeof(*ip6h0) > d0->mtu))) { + vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip6h0); + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; + next0 = IP4_MAP_NEXT_FRAGMENT; + } else { + next0 = ip4_map_ip6_lookup_bypass(p0, ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0; + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip6h0->payload_length) + 40); + } + } else { + next0 = IP4_MAP_NEXT_DROP; + } + + /* + * Determine next node. Can be one of: + * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + */ + if (PREDICT_TRUE(error1 == MAP_ERROR_NONE)) { + if (PREDICT_FALSE(d1->mtu && (clib_net_to_host_u16(ip6h1->payload_length) + sizeof(*ip6h1) > d1->mtu))) { + vnet_buffer(p1)->ip_frag.header_offset = sizeof(*ip6h1); + vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; + vnet_buffer(p1)->ip_frag.mtu = d1->mtu; + vnet_buffer(p1)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; + next1 = IP4_MAP_NEXT_FRAGMENT; + } else { + next1 = ip4_map_ip6_lookup_bypass(p1, ip41) ? IP4_MAP_NEXT_IP6_REWRITE : next1; + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index1, 1, + clib_net_to_host_u16(ip6h1->payload_length) + 40); + } + } else { + next1 = IP4_MAP_NEXT_DROP; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + } + if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p1, sizeof(*tr)); + tr->map_domain_index = map_domain_index1; + tr->port = port1; + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + map_domain_t *d0; + u8 error0 = MAP_ERROR_NONE; + ip4_header_t *ip40; + u16 port0 = 0; + ip6_header_t *ip6h0; + u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP; + u32 map_domain_index0 = ~0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip40 = vlib_buffer_get_current(p0); + p0->current_length = clib_net_to_host_u16(ip40->length); + d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &map_domain_index0); + ASSERT(d0); + + /* + * Shared IPv4 address + */ + port0 = ip4_map_port_and_security_check(d0, ip40, &next0, &error0); + + /* MAP calc */ + u32 da40 = clib_net_to_host_u32(ip40->dst_address.as_u32); + u16 dp40 = clib_net_to_host_u16(port0); + u64 dal60 = map_get_pfx(d0, da40, dp40); + u64 dar60 = map_get_sfx(d0, da40, dp40); + if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE) error0 = MAP_ERROR_UNKNOWN; + + /* construct ipv6 header */ + vlib_buffer_advance(p0, - (sizeof(ip6_header_t))); + ip6h0 = vlib_buffer_get_current(p0); + vnet_buffer(p0)->sw_if_index[VLIB_TX] = (u32)~0; + + ip6h0->ip_version_traffic_class_and_flow_label = ip4_map_vtcfl(ip40, p0); + ip6h0->payload_length = ip40->length; + ip6h0->protocol = IP_PROTOCOL_IP_IN_IP; + ip6h0->hop_limit = 0x40; + ip6h0->src_address = d0->ip6_src; + ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64(dal60); + ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64(dar60); + + /* + * Determine next node. Can be one of: + * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + */ + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) { + if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip6h0->payload_length) + sizeof(*ip6h0) > d0->mtu))) { + vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip6h0); + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; + next0 = IP4_MAP_NEXT_FRAGMENT; + } else { + next0 = ip4_map_ip6_lookup_bypass(p0, ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0; + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip6h0->payload_length) + 40); + } + } else { + next0 = IP4_MAP_NEXT_DROP; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + } + + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* + * ip4_map_reass + */ +static uword +ip4_map_reass (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_reass_node.index); + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 cpu_index = os_get_cpu_number(); + u32 *fragments_to_drop = NULL; + u32 *fragments_to_loopback = NULL; + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + map_domain_t *d0; + u8 error0 = MAP_ERROR_NONE; + ip4_header_t *ip40; + i32 port0 = 0; + ip6_header_t *ip60; + u32 next0 = IP4_MAP_REASS_NEXT_IP6_LOOKUP; + u32 map_domain_index0; + u8 cached = 0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + ip40 = (ip4_header_t *)(ip60 + 1); + d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &map_domain_index0); + + map_ip4_reass_lock(); + map_ip4_reass_t *r = map_ip4_reass_get(ip40->src_address.as_u32, ip40->dst_address.as_u32, + ip40->fragment_id, ip40->protocol, &fragments_to_drop); + if (PREDICT_FALSE(!r)) { + // Could not create a caching entry + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } else if (PREDICT_TRUE(ip4_get_fragment_offset(ip40))) { + if (r->port >= 0) { + // We know the port already + port0 = r->port; + } else if (map_ip4_reass_add_fragment(r, pi0)) { + // Not enough space for caching + error0 = MAP_ERROR_FRAGMENT_MEMORY; + map_ip4_reass_free(r, &fragments_to_drop); + } else { + cached = 1; + } + } else if ((port0 = ip4_get_port(ip40, MAP_RECEIVER, p0->current_length)) < 0) { + // Could not find port. We'll free the reassembly. + error0 = MAP_ERROR_BAD_PROTOCOL; + port0 = 0; + map_ip4_reass_free(r, &fragments_to_drop); + } else { + r->port = port0; + map_ip4_reass_get_fragments(r, &fragments_to_loopback); + } + +#ifdef MAP_IP4_REASS_COUNT_BYTES + if (!cached && r) { + r->forwarded += clib_host_to_net_u16(ip40->length) - 20; + if (!ip4_get_fragment_more(ip40)) + r->expected_total = ip4_get_fragment_offset(ip40) * 8 + clib_host_to_net_u16(ip40->length) - 20; + if(r->forwarded >= r->expected_total) + map_ip4_reass_free(r, &fragments_to_drop); + } +#endif + + map_ip4_reass_unlock(); + + // NOTE: Most operations have already been performed by ip4_map + // All we need is the right destination address + ip60->dst_address.as_u64[0] = map_get_pfx_net(d0, ip40->dst_address.as_u32, port0); + ip60->dst_address.as_u64[1] = map_get_sfx_net(d0, ip40->dst_address.as_u32, port0); + + if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip60->payload_length) + sizeof(*ip60) > d0->mtu))) { + vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip60); + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; + next0 = IP4_MAP_REASS_NEXT_IP4_FRAGMENT; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_ip4_map_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + tr->cached = cached; + } + + if(cached) { + //Dequeue the packet + n_left_to_next++; + to_next--; + } else { + if (error0 == MAP_ERROR_NONE) + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip60->payload_length) + 40); + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP4_MAP_REASS_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + + //Loopback when we reach the end of the inpu vector + if(n_left_from == 0 && vec_len(fragments_to_loopback)) { + from = vlib_frame_vector_args(frame); + u32 len = vec_len(fragments_to_loopback); + if(len <= VLIB_FRAME_SIZE) { + memcpy(from, fragments_to_loopback, sizeof(u32)*len); + n_left_from = len; + vec_reset_length(fragments_to_loopback); + } else { + memcpy(from, fragments_to_loopback + (len - VLIB_FRAME_SIZE), sizeof(u32)*VLIB_FRAME_SIZE); + n_left_from = VLIB_FRAME_SIZE; + _vec_len(fragments_to_loopback) = len - VLIB_FRAME_SIZE; + } + } + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + map_send_all_to_node(vm, fragments_to_drop, node, + &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], + IP4_MAP_REASS_NEXT_DROP); + + vec_free(fragments_to_drop); + vec_free(fragments_to_loopback); + return frame->n_vectors; +} + +static char *map_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip4_map_node) = { + .function = ip4_map, + .name = "ip4-map", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + + .n_next_nodes = IP4_MAP_N_NEXT, + .next_nodes = { + [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup", +#ifdef MAP_SKIP_IP6_LOOKUP + [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-rewrite", +#endif + [IP4_MAP_NEXT_FRAGMENT] = "ip4-frag", + [IP4_MAP_NEXT_REASS] = "ip4-map-reass", + [IP4_MAP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip4_map_reass_node) = { + .function = ip4_map_reass, + .name = "ip4-map-reass", + .vector_size = sizeof(u32), + .format_trace = format_ip4_map_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + + .n_next_nodes = IP4_MAP_REASS_N_NEXT, + .next_nodes = { + [IP4_MAP_REASS_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAP_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP4_MAP_REASS_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/ip4_map_t.c b/vnet/vnet/map/ip4_map_t.c new file mode 100644 index 00000000000..07f5b19c257 --- /dev/null +++ b/vnet/vnet/map/ip4_map_t.c @@ -0,0 +1,1092 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "map.h" + +#include "../ip/ip_frag.h" + +#define IP4_MAP_T_DUAL_LOOP 1 + +typedef enum { + IP4_MAPT_NEXT_MAPT_TCP_UDP, + IP4_MAPT_NEXT_MAPT_ICMP, + IP4_MAPT_NEXT_MAPT_FRAGMENTED, + IP4_MAPT_NEXT_DROP, + IP4_MAPT_N_NEXT +} ip4_mapt_next_t; + +typedef enum { + IP4_MAPT_ICMP_NEXT_IP6_LOOKUP, + IP4_MAPT_ICMP_NEXT_IP6_FRAG, + IP4_MAPT_ICMP_NEXT_DROP, + IP4_MAPT_ICMP_N_NEXT +} ip4_mapt_icmp_next_t; + +typedef enum { + IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP, + IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG, + IP4_MAPT_TCP_UDP_NEXT_DROP, + IP4_MAPT_TCP_UDP_N_NEXT +} ip4_mapt_tcp_udp_next_t; + +typedef enum { + IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP, + IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG, + IP4_MAPT_FRAGMENTED_NEXT_DROP, + IP4_MAPT_FRAGMENTED_N_NEXT +} ip4_mapt_fragmented_next_t; + +//This is used to pass information within the buffer data. +//Buffer structure being too small to contain big structures like this. +typedef CLIB_PACKED(struct { + ip6_address_t daddr; + ip6_address_t saddr; + //IPv6 header + Fragmentation header will be here + //sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4) + u8 unused[28]; +}) ip4_mapt_pseudo_header_t; + +#define frag_id_4to6(id) (id) + +//TODO: Find the right place in memory for this. +static u8 icmp_to_icmp6_updater_pointer_table[] = + { 0, 1, 4, 4,~0, + ~0,~0,~0, 7, 6, + ~0,~0, 8, 8, 8, + 8, 24, 24, 24, 24 }; + + +static_always_inline int +ip4_map_fragment_cache (ip4_header_t *ip4, u16 port) +{ + u32 *ignore = NULL; + map_ip4_reass_lock(); + map_ip4_reass_t *r = map_ip4_reass_get(ip4->src_address.as_u32, ip4->dst_address.as_u32, + ip4->fragment_id, + (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, + &ignore); + if (r) + r->port = port; + + map_ip4_reass_unlock(); + return !r; +} + +static_always_inline i32 +ip4_map_fragment_get_port (ip4_header_t *ip4) +{ + u32 *ignore = NULL; + map_ip4_reass_lock(); + map_ip4_reass_t *r = map_ip4_reass_get(ip4->src_address.as_u32, ip4->dst_address.as_u32, + ip4->fragment_id, + (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, + &ignore); + i32 ret = r?r->port:-1; + map_ip4_reass_unlock(); + return ret; +} + + +/* Statelessly translates an ICMP packet into ICMPv6. + * + * Warning: The checksum will need to be recomputed. + * + */ +static_always_inline int +ip4_icmp_to_icmp6_in_place (icmp46_header_t *icmp, u32 icmp_len, + i32 *receiver_port, ip4_header_t **inner_ip4) +{ + *inner_ip4 = NULL; + switch (icmp->type) { + case ICMP4_echo_reply: + *receiver_port = ((u16 *)icmp)[2]; + icmp->type = ICMP6_echo_reply; + break; + case ICMP4_echo_request: + *receiver_port = ((u16 *)icmp)[2]; + icmp->type = ICMP6_echo_request; + break; + case ICMP4_destination_unreachable: + *inner_ip4 = (ip4_header_t *)(((u8 *) icmp) + 8); + *receiver_port = ip4_get_port(*inner_ip4, MAP_SENDER, icmp_len - 8); + + switch (icmp->code) { + case ICMP4_destination_unreachable_destination_unreachable_net: //0 + case ICMP4_destination_unreachable_destination_unreachable_host: //1 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_no_route_to_destination; + break; + case ICMP4_destination_unreachable_protocol_unreachable: //2 + icmp->type = ICMP6_parameter_problem; + icmp->code = ICMP6_parameter_problem_unrecognized_next_header; + break; + case ICMP4_destination_unreachable_port_unreachable: //3 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_port_unreachable; + break; + case ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set: //4 + icmp->type = ICMP6_packet_too_big; + icmp->code = 0; + { + u32 advertised_mtu = clib_net_to_host_u32(*((u32 *)(icmp + 1))); + if (advertised_mtu) + advertised_mtu += 20; + else + advertised_mtu = 1000; //FIXME ! (RFC 1191 - plateau value) + + //FIXME: = minimum(advertised MTU+20, MTU_of_IPv6_nexthop, (MTU_of_IPv4_nexthop)+20) + *((u32 *)(icmp + 1)) = clib_host_to_net_u32(advertised_mtu); + } + break; + + case ICMP4_destination_unreachable_source_route_failed: //5 + case ICMP4_destination_unreachable_destination_network_unknown: //6 + case ICMP4_destination_unreachable_destination_host_unknown: //7 + case ICMP4_destination_unreachable_source_host_isolated: //8 + case ICMP4_destination_unreachable_network_unreachable_for_type_of_service: //11 + case ICMP4_destination_unreachable_host_unreachable_for_type_of_service: //12 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_no_route_to_destination; + break; + case ICMP4_destination_unreachable_network_administratively_prohibited: //9 + case ICMP4_destination_unreachable_host_administratively_prohibited: //10 + case ICMP4_destination_unreachable_communication_administratively_prohibited: //13 + case ICMP4_destination_unreachable_precedence_cutoff_in_effect: //15 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_destination_administratively_prohibited; + break; + case ICMP4_destination_unreachable_host_precedence_violation: //14 + default: + return -1; + } + break; + + case ICMP4_time_exceeded: //11 + *inner_ip4 = (ip4_header_t *)(((u8 *) icmp) + 8); + *receiver_port = ip4_get_port(*inner_ip4, MAP_SENDER, icmp_len - 8); + icmp->type = ICMP6_time_exceeded; + //icmp->code = icmp->code //unchanged + break; + + case ICMP4_parameter_problem: + *inner_ip4 = (ip4_header_t *)(((u8 *) icmp) + 8); + *receiver_port = ip4_get_port(*inner_ip4, MAP_SENDER, icmp_len - 8); + + switch (icmp->code) { + case ICMP4_parameter_problem_pointer_indicates_error: + case ICMP4_parameter_problem_bad_length: + icmp->type = ICMP6_parameter_problem; + icmp->code = ICMP6_parameter_problem_erroneous_header_field; + { + u8 ptr = icmp_to_icmp6_updater_pointer_table[*((u8 *)(icmp + 1))]; + if (ptr == 0xff) + return -1; + + *((u32 *)(icmp + 1)) = clib_host_to_net_u32(ptr); + } + break; + default: + //All other codes cause dropping the packet + return -1; + } + break; + + default: + //All other types cause dropping the packet + return -1; + break; + } + return 0; +} + +static_always_inline void +_ip4_map_t_icmp (map_domain_t *d, vlib_buffer_t *p, u8 *error) +{ + ip4_header_t *ip4, *inner_ip4; + ip6_header_t *ip6, *inner_ip6; + u32 ip_len; + icmp46_header_t *icmp; + i32 recv_port; + ip_csum_t csum; + u16 *inner_L4_checksum = 0; + ip6_frag_hdr_t *inner_frag; + u32 inner_frag_id; + u32 inner_frag_offset; + u8 inner_frag_more; + + ip4 = vlib_buffer_get_current(p); + ip_len = clib_net_to_host_u16(ip4->length); + ASSERT(ip_len <= p->current_length); + + icmp = (icmp46_header_t *)(ip4 + 1); + if (ip4_icmp_to_icmp6_in_place(icmp, ip_len - sizeof(*ip4), + &recv_port, &inner_ip4)) { + *error = MAP_ERROR_ICMP; + return; + } + + if (recv_port < 0) { + // In case of 1:1 mapping, we don't care about the port + if(d->ea_bits_len == 0 && d->rules) { + recv_port = 0; + } else { + *error = MAP_ERROR_ICMP; + return; + } + } + + if (inner_ip4) { + //We have 2 headers to translate. + //We need to make some room in the middle of the packet + + if (PREDICT_FALSE(ip4_is_fragment(inner_ip4))) { + //Here it starts getting really tricky + //We will add a fragmentation header in the inner packet + + if (!ip4_is_first_fragment(inner_ip4)) { + //For now we do not handle unless it is the first fragment + //Ideally we should handle the case as we are in slow path already + *error = MAP_ERROR_FRAGMENTED; + return; + } + + vlib_buffer_advance(p, - 2*(sizeof(*ip6) - sizeof(*ip4)) - sizeof(*inner_frag)); + ip6 = vlib_buffer_get_current(p); + memcpy(u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)), ip4, 20 + 8); + ip4 = (ip4_header_t *) u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)); + icmp = (icmp46_header_t *) (ip4 + 1); + + inner_ip6 = (ip6_header_t *) u8_ptr_add(inner_ip4, sizeof(*ip4) - sizeof(*ip6) - sizeof(*inner_frag)); + inner_frag = (ip6_frag_hdr_t *) u8_ptr_add(inner_ip6, sizeof(*inner_ip6)); + ip6->payload_length = u16_net_add(ip4->length, sizeof(*ip6) - 2*sizeof(*ip4) + sizeof(*inner_frag)); + inner_frag_id = frag_id_4to6(inner_ip4->fragment_id); + inner_frag_offset = ip4_get_fragment_offset(inner_ip4); + inner_frag_more = !!(inner_ip4->flags_and_fragment_offset & clib_net_to_host_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS)); + } else { + vlib_buffer_advance(p, - 2*(sizeof(*ip6) - sizeof(*ip4))); + ip6 = vlib_buffer_get_current(p); + memcpy(u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)), ip4, 20 + 8); + ip4 = (ip4_header_t *) u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)); + icmp = (icmp46_header_t *) u8_ptr_add(ip4, sizeof(*ip4)); + inner_ip6 = (ip6_header_t *) u8_ptr_add(inner_ip4, sizeof(*ip4) - sizeof(*ip6)); + ip6->payload_length = u16_net_add(ip4->length, sizeof(*ip6) - 2*sizeof(*ip4)); + inner_frag = NULL; + } + + if (PREDICT_TRUE(inner_ip4->protocol == IP_PROTOCOL_TCP)) { + inner_L4_checksum = &((tcp_header_t *) (inner_ip4 + 1))->checksum; + *inner_L4_checksum = ip_csum_fold(ip_csum_sub_even(*inner_L4_checksum, *((u64 *) (&inner_ip4->src_address)))); + } else if (PREDICT_TRUE(inner_ip4->protocol == IP_PROTOCOL_UDP)) { + inner_L4_checksum = &((udp_header_t *) (inner_ip4 + 1))->checksum; + if (!*inner_L4_checksum) { + //The inner packet was first translated, and therefore came from IPv6. + //As the packet was an IPv6 packet, the UDP checksum can't be NULL + *error = MAP_ERROR_ICMP; + return; + } + *inner_L4_checksum = ip_csum_fold(ip_csum_sub_even(*inner_L4_checksum, *((u64 *)(&inner_ip4->src_address)))); + } else if (inner_ip4->protocol == IP_PROTOCOL_ICMP) { + //We have an ICMP inside an ICMP + //It needs to be translated, but not for error ICMP messages + icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1); + csum = inner_icmp->checksum; + //Only types ICMP4_echo_request and ICMP4_echo_reply are handled by ip4_icmp_to_icmp6_in_place + csum = ip_csum_sub_even(csum, *((u16 *)inner_icmp)); + inner_icmp->type = (inner_icmp->type == ICMP4_echo_request)? + ICMP6_echo_request:ICMP6_echo_reply; + csum = ip_csum_add_even(csum, *((u16 *)inner_icmp)); + csum = ip_csum_add_even(csum, clib_host_to_net_u16(IP_PROTOCOL_ICMP6)); + csum = ip_csum_add_even(csum, inner_ip4->length - sizeof(*inner_ip4)); + inner_icmp->checksum = ip_csum_fold(csum); + inner_L4_checksum = &inner_icmp->checksum; + inner_ip4->protocol = IP_PROTOCOL_ICMP6; + } else { + ASSERT(0); // We had a port from that, so it is udp or tcp or ICMP + } + + //FIXME: Security check with the port found in the inner packet + + csum = *inner_L4_checksum; //Initial checksum of the inner L4 header + //FIXME: Shouldn't we remove ip addresses from there ? + + inner_ip6->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (inner_ip4->tos << 20)); + inner_ip6->payload_length = u16_net_add(inner_ip4->length, - sizeof(*inner_ip4)); + inner_ip6->hop_limit = inner_ip4->ttl; + inner_ip6->protocol = inner_ip4->protocol; + + //Note that the source address is within the domain + //while the destination address is the one outside the domain + ip4_map_t_embedded_address(d, &inner_ip6->dst_address, &inner_ip4->dst_address); + inner_ip6->src_address.as_u64[0] = map_get_pfx_net(d, inner_ip4->src_address.as_u32, recv_port); + inner_ip6->src_address.as_u64[1] = map_get_sfx_net(d, inner_ip4->src_address.as_u32, recv_port); + + if (PREDICT_FALSE(inner_frag != NULL)) { + inner_frag->next_hdr = inner_ip6->protocol; + inner_frag->identification = inner_frag_id; + inner_frag->rsv = 0; + inner_frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(inner_frag_offset, inner_frag_more); + inner_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + inner_ip6->payload_length = clib_host_to_net_u16( + clib_net_to_host_u16(inner_ip6->payload_length) + sizeof(*inner_frag)); + } + + csum = ip_csum_add_even(csum, inner_ip6->src_address.as_u64[0]); + csum = ip_csum_add_even(csum, inner_ip6->src_address.as_u64[1]); + csum = ip_csum_add_even(csum, inner_ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even(csum, inner_ip6->dst_address.as_u64[1]); + *inner_L4_checksum = ip_csum_fold(csum); + + } else { + vlib_buffer_advance(p, sizeof(*ip4) - sizeof(*ip6)); + ip6 = vlib_buffer_get_current(p); + ip6->payload_length = clib_host_to_net_u16(clib_net_to_host_u16(ip4->length) - sizeof(*ip4)); + } + + //Translate outer IPv6 + ip6->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip4->tos << 20)); + + ip6->hop_limit = ip4->ttl; + ip6->protocol = IP_PROTOCOL_ICMP6; + + ip4_map_t_embedded_address(d, &ip6->src_address, &ip4->src_address); + ip6->dst_address.as_u64[0] = map_get_pfx_net(d, ip4->dst_address.as_u32, recv_port); + ip6->dst_address.as_u64[1] = map_get_sfx_net(d, ip4->dst_address.as_u32, recv_port); + + //Truncate when the packet exceeds the minimal IPv6 MTU + if (p->current_length > 1280) { + ip6->payload_length = clib_host_to_net_u16(1280 - sizeof(*ip6)); + p->current_length = 1280; //Looks too simple to be correct... + } + + //TODO: We could do an easy diff-checksum for echo requests/replies + //Recompute ICMP checksum + icmp->checksum = 0; + csum = ip_csum_with_carry(0, ip6->payload_length); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(ip6->protocol)); + csum = ip_csum_with_carry(csum, ip6->src_address.as_u64[0]); + csum = ip_csum_with_carry(csum, ip6->src_address.as_u64[1]); + csum = ip_csum_with_carry(csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_with_carry(csum, ip6->dst_address.as_u64[1]); + csum = ip_incremental_checksum(csum, icmp, clib_net_to_host_u16(ip6->payload_length)); + icmp->checksum = ~ip_csum_fold (csum); +} + +static uword +ip4_map_t_icmp (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_t_icmp_node.index); + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 cpu_index = os_get_cpu_number(); + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip4_mapt_icmp_next_t next0; + u8 error0; + map_domain_t *d0; + u16 len0; + + next0 = IP4_MAPT_ICMP_NEXT_IP6_LOOKUP; + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + vlib_buffer_advance(p0, sizeof(ip4_mapt_pseudo_header_t)); //The pseudo-header is not used + len0 = clib_net_to_host_u16(((ip4_header_t *)vlib_buffer_get_current(p0))->length); + d0 = pool_elt_at_index(map_main.domains, vnet_buffer(p0)->map_t.map_domain_index); + _ip4_map_t_icmp(d0, p0, &error0); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG; + } + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + len0); + } else { + next0 = IP4_MAPT_ICMP_NEXT_DROP; + } + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static uword +ip4_map_t_fragmented (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip4_header_t *ip40; + ip6_header_t *ip60; + ip6_frag_hdr_t *frag0; + ip4_mapt_pseudo_header_t *pheader0; + ip4_mapt_fragmented_next_t next0; + + next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP; + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + + //Accessing pseudo header + pheader0 = vlib_buffer_get_current(p0); + vlib_buffer_advance(p0, sizeof(*pheader0)); + + //Accessing ip4 header + ip40 = vlib_buffer_get_current(p0); + frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0)); + ip60 = (ip6_header_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0) - sizeof(*ip60)); + vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + + //We know that the protocol was one of ICMP, TCP or UDP + //because the first fragment was found and cached + frag0->next_hdr = (ip40->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip40->protocol; + frag0->identification = frag_id_4to6(ip40->fragment_id); + frag0->rsv = 0; + frag0->fragment_offset_and_more = ip6_frag_hdr_offset_and_more( + ip4_get_fragment_offset(ip40), + clib_net_to_host_u16(ip40->flags_and_fragment_offset) & IP4_HEADER_FLAG_MORE_FRAGMENTS); + + ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip40->tos << 20)); + ip60->payload_length = clib_host_to_net_u16(clib_net_to_host_u16(ip40->length) - sizeof(*ip40) + sizeof(*frag0)); + ip60->hop_limit = ip40->ttl; + ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0]; + ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1]; + ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0]; + ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1]; + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static uword +ip4_map_t_tcp_udp(vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP4_MAP_T_DUAL_LOOP + while (n_left_from >= 4 && n_left_to_next >= 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip4_header_t *ip40, *ip41; + ip6_header_t *ip60, *ip61; + ip_csum_t csum0, csum1; + u16 *checksum0, *checksum1; + ip6_frag_hdr_t *frag0, *frag1; + u32 frag_id0, frag_id1; + ip4_mapt_pseudo_header_t *pheader0, *pheader1; + ip4_mapt_tcp_udp_next_t next0, next1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next +=2; + n_left_to_next -= 2; + + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; + next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + + //Accessing pseudo header + pheader0 = vlib_buffer_get_current(p0); + pheader1 = vlib_buffer_get_current(p1); + vlib_buffer_advance(p0, sizeof(*pheader0)); + vlib_buffer_advance(p1, sizeof(*pheader1)); + + //Accessing ip4 header + ip40 = vlib_buffer_get_current(p0); + ip41 = vlib_buffer_get_current(p1); + checksum0 = (u16 *) u8_ptr_add(ip40, vnet_buffer(p0)->map_t.checksum_offset); + checksum1 = (u16 *) u8_ptr_add(ip41, vnet_buffer(p1)->map_t.checksum_offset); + + //UDP checksum is optional over IPv4 but mandatory for IPv6 + //We do not check udp->length sanity but use our safe computed value instead + if (PREDICT_FALSE(!*checksum0 && ip40->protocol == IP_PROTOCOL_UDP)) { + u16 udp_len = clib_host_to_net_u16(ip40->length) - sizeof(*ip40); + udp_header_t *udp = (udp_header_t *) u8_ptr_add(ip40, sizeof(*ip40)); + ip_csum_t csum; + csum = ip_incremental_checksum(0, udp, udp_len); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(udp_len)); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(IP_PROTOCOL_UDP)); + csum = ip_csum_with_carry(csum, *((u64 *)(&ip40->src_address))); + *checksum0 = ~ip_csum_fold(csum); + } + if (PREDICT_FALSE(!*checksum1 && ip41->protocol == IP_PROTOCOL_UDP)) { + u16 udp_len = clib_host_to_net_u16(ip41->length) - sizeof(*ip40); + udp_header_t *udp = (udp_header_t *) u8_ptr_add(ip41, sizeof(*ip40)); + ip_csum_t csum; + csum = ip_incremental_checksum(0, udp, udp_len); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(udp_len)); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(IP_PROTOCOL_UDP)); + csum = ip_csum_with_carry(csum, *((u64 *)(&ip41->src_address))); + *checksum1 = ~ip_csum_fold(csum); + } + + csum0 = ip_csum_sub_even(*checksum0, ip40->src_address.as_u32); + csum1 = ip_csum_sub_even(*checksum1, ip41->src_address.as_u32); + csum0 = ip_csum_sub_even(csum0, ip40->dst_address.as_u32); + csum1 = ip_csum_sub_even(csum1, ip41->dst_address.as_u32); + + // Deal with fragmented packets + if (PREDICT_FALSE(ip40->flags_and_fragment_offset & + clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS))) { + ip60 = (ip6_header_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0)); + frag_id0 = frag_id_4to6(ip40->fragment_id); + vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + } else { + ip60 = (ip6_header_t *) (((u8 *)ip40) + sizeof(*ip40) - sizeof(*ip60)); + vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60)); + frag0 = NULL; + } + + if (PREDICT_FALSE(ip41->flags_and_fragment_offset & + clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS))) { + ip61 = (ip6_header_t *) u8_ptr_add(ip41, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + frag1 = (ip6_frag_hdr_t *) u8_ptr_add(ip41, sizeof(*ip40) - sizeof(*frag0)); + frag_id1 = frag_id_4to6(ip41->fragment_id); + vlib_buffer_advance(p1, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + } else { + ip61 = (ip6_header_t *) (((u8 *)ip41) + sizeof(*ip40) - sizeof(*ip60)); + vlib_buffer_advance(p1, sizeof(*ip40) - sizeof(*ip60)); + frag1 = NULL; + } + + ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip40->tos << 20)); + ip61->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip41->tos << 20)); + ip60->payload_length = u16_net_add(ip40->length, - sizeof(*ip40)); + ip61->payload_length = u16_net_add(ip41->length, - sizeof(*ip40)); + ip60->hop_limit = ip40->ttl; + ip61->hop_limit = ip41->ttl; + ip60->protocol = ip40->protocol; + ip61->protocol = ip41->protocol; + + if (PREDICT_FALSE(frag0 != NULL)) { + frag0->next_hdr = ip60->protocol; + frag0->identification = frag_id0; + frag0->rsv = 0; + frag0->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(0, 1); + ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip60->payload_length = u16_net_add(ip60->payload_length, sizeof(*frag0)); + } + + if (PREDICT_FALSE(frag1 != NULL)) { + frag1->next_hdr = ip61->protocol; + frag1->identification = frag_id1; + frag1->rsv = 0; + frag1->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(0, 1); + ip61->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip61->payload_length = u16_net_add(ip61->payload_length, sizeof(*frag0)); + } + + //Finally copying the address + ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0]; + ip61->dst_address.as_u64[0] = pheader1->daddr.as_u64[0]; + ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1]; + ip61->dst_address.as_u64[1] = pheader1->daddr.as_u64[1]; + ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0]; + ip61->src_address.as_u64[0] = pheader1->saddr.as_u64[0]; + ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1]; + ip61->src_address.as_u64[1] = pheader1->saddr.as_u64[1]; + + csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[0]); + csum1 = ip_csum_add_even(csum1, ip61->src_address.as_u64[0]); + csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[1]); + csum1 = ip_csum_add_even(csum1, ip61->src_address.as_u64[1]); + csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[0]); + csum1 = ip_csum_add_even(csum1, ip61->dst_address.as_u64[0]); + csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[1]); + csum1 = ip_csum_add_even(csum1, ip61->dst_address.as_u64[1]); + *checksum0 = ip_csum_fold(csum0); + *checksum1 = ip_csum_fold(csum1); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } + + if(vnet_buffer(p1)->map_t.mtu < p1->current_length) { + vnet_buffer(p1)->ip_frag.header_offset = 0; + vnet_buffer(p1)->ip_frag.mtu = vnet_buffer(p1)->map_t.mtu; + vnet_buffer(p1)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, + to_next, n_left_to_next, pi0, pi1, + next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip4_header_t *ip40; + ip6_header_t *ip60; + ip_csum_t csum0; + u16 *checksum0; + ip6_frag_hdr_t *frag0; + u32 frag_id0; + ip4_mapt_pseudo_header_t *pheader0; + ip4_mapt_tcp_udp_next_t next0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; + p0 = vlib_get_buffer(vm, pi0); + + //Accessing pseudo header + pheader0 = vlib_buffer_get_current(p0); + vlib_buffer_advance(p0, sizeof(*pheader0)); + + //Accessing ip4 header + ip40 = vlib_buffer_get_current(p0); + checksum0 = (u16 *) u8_ptr_add(ip40, vnet_buffer(p0)->map_t.checksum_offset); + + //UDP checksum is optional over IPv4 but mandatory for IPv6 + //We do not check udp->length sanity but use our safe computed value instead + if (PREDICT_FALSE(!*checksum0 && ip40->protocol == IP_PROTOCOL_UDP)) { + u16 udp_len = clib_host_to_net_u16(ip40->length) - sizeof(*ip40); + udp_header_t *udp = (udp_header_t *) u8_ptr_add(ip40, sizeof(*ip40)); + ip_csum_t csum; + csum = ip_incremental_checksum(0, udp, udp_len); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(udp_len)); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(IP_PROTOCOL_UDP)); + csum = ip_csum_with_carry(csum, *((u64 *)(&ip40->src_address))); + *checksum0 = ~ip_csum_fold(csum); + } + + csum0 = ip_csum_sub_even(*checksum0, ip40->src_address.as_u32); + csum0 = ip_csum_sub_even(csum0, ip40->dst_address.as_u32); + + // Deal with fragmented packets + if (PREDICT_FALSE(ip40->flags_and_fragment_offset & + clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS))) { + ip60 = (ip6_header_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0)); + frag_id0 = frag_id_4to6(ip40->fragment_id); + vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + } else { + ip60 = (ip6_header_t *) (((u8 *)ip40) + sizeof(*ip40) - sizeof(*ip60)); + vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60)); + frag0 = NULL; + } + + ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip40->tos << 20)); + ip60->payload_length = u16_net_add(ip40->length, - sizeof(*ip40)); + ip60->hop_limit = ip40->ttl; + ip60->protocol = ip40->protocol; + + if (PREDICT_FALSE(frag0 != NULL)) { + frag0->next_hdr = ip60->protocol; + frag0->identification = frag_id0; + frag0->rsv = 0; + frag0->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(0, 1); + ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip60->payload_length = u16_net_add(ip60->payload_length, sizeof(*frag0)); + } + + //Finally copying the address + ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0]; + ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1]; + ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0]; + ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1]; + + csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[0]); + csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[1]); + csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[0]); + csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[1]); + *checksum0 = ip_csum_fold(csum0); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + //Send to fragmentation node if necessary + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static_always_inline void +ip4_map_t_classify(vlib_buffer_t *p0, map_domain_t *d0, ip4_header_t *ip40, u16 ip4_len0, + i32 *dst_port0, u8 *error0, ip4_mapt_next_t *next0) +{ + if (PREDICT_FALSE(ip4_get_fragment_offset(ip40))) { + *next0 = IP4_MAPT_NEXT_MAPT_FRAGMENTED; + if(d0->ea_bits_len == 0 && d0->rules) { + *dst_port0 = 0; + } else { + *dst_port0 = ip4_map_fragment_get_port(ip40); + *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0; + } + } else if (PREDICT_TRUE(ip40->protocol == IP_PROTOCOL_TCP)) { + vnet_buffer(p0)->map_t.checksum_offset = 36; + *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; + *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0; + *dst_port0 = (i32) *((u16 *)u8_ptr_add(ip40, sizeof(*ip40) + 2)); + } else if (PREDICT_TRUE(ip40->protocol == IP_PROTOCOL_UDP)) { + vnet_buffer(p0)->map_t.checksum_offset = 26; + *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; + *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0; + *dst_port0 = (i32) *((u16 *)u8_ptr_add(ip40, sizeof(*ip40) + 2)); + } else if (ip40->protocol == IP_PROTOCOL_ICMP) { + *next0 = IP4_MAPT_NEXT_MAPT_ICMP; + if(d0->ea_bits_len == 0 && d0->rules) + *dst_port0 = 0; + else if (((icmp46_header_t *) u8_ptr_add(ip40, sizeof(*ip40)))->code == ICMP4_echo_reply || + ((icmp46_header_t *) u8_ptr_add(ip40, sizeof(*ip40)))->code == ICMP4_echo_request) + *dst_port0 = (i32) *((u16 *)u8_ptr_add(ip40, sizeof(*ip40) + 6)); + } else { + *error0 = MAP_ERROR_BAD_PROTOCOL; + } +} + +static uword +ip4_map_t (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_t_node.index); + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 cpu_index = os_get_cpu_number(); + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP4_MAP_T_DUAL_LOOP + while (n_left_from >= 4 && n_left_to_next >= 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip4_header_t *ip40, *ip41; + map_domain_t *d0, *d1; + ip4_mapt_next_t next0, next1; + u16 ip4_len0, ip4_len1; + u8 error0, error1; + i32 dst_port0, dst_port1; + ip4_mapt_pseudo_header_t *pheader0, *pheader1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next +=2; + n_left_to_next -= 2; + error0 = MAP_ERROR_NONE; + error1 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip40 = vlib_buffer_get_current(p0); + ip41 = vlib_buffer_get_current(p1); + ip4_len0 = clib_host_to_net_u16(ip40->length); + ip4_len1 = clib_host_to_net_u16(ip41->length); + + if (PREDICT_FALSE(p0->current_length < ip4_len0 || + ip40->ip_version_and_header_length != 0x45)) { + error0 = MAP_ERROR_UNKNOWN; + next0 = IP4_MAPT_NEXT_DROP; + } + + if (PREDICT_FALSE(p1->current_length < ip4_len1 || + ip41->ip_version_and_header_length != 0x45)) { + error1 = MAP_ERROR_UNKNOWN; + next1 = IP4_MAPT_NEXT_DROP; + } + + d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], + &vnet_buffer(p0)->map_t.map_domain_index); + d1 = ip4_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], + &vnet_buffer(p1)->map_t.map_domain_index); + + vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + vnet_buffer(p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0; + + dst_port0 = -1; + dst_port1 = -1; + + ip4_map_t_classify(p0, d0, ip40, ip4_len0, &dst_port0, &error0, &next0); + ip4_map_t_classify(p1, d1, ip41, ip4_len1, &dst_port1, &error1, &next1); + + //Add MAP-T pseudo header in front of the packet + vlib_buffer_advance(p0, - sizeof(*pheader0)); + vlib_buffer_advance(p1, - sizeof(*pheader1)); + pheader0 = vlib_buffer_get_current(p0); + pheader1 = vlib_buffer_get_current(p1); + + //Save addresses within the packet + ip4_map_t_embedded_address(d0, &pheader0->saddr, &ip40->src_address); + ip4_map_t_embedded_address(d1, &pheader1->saddr, &ip41->src_address); + pheader0->daddr.as_u64[0] = map_get_pfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0); + pheader0->daddr.as_u64[1] = map_get_sfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0); + pheader1->daddr.as_u64[0] = map_get_pfx_net(d1, ip41->dst_address.as_u32, (u16)dst_port1); + pheader1->daddr.as_u64[1] = map_get_sfx_net(d1, ip41->dst_address.as_u32, (u16)dst_port1); + + if (PREDICT_FALSE(ip4_is_first_fragment(ip40) && (dst_port0 != -1) && + (d0->ea_bits_len != 0 || !d0->rules) && + ip4_map_fragment_cache(ip40, dst_port0))) { + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } + + if (PREDICT_FALSE(ip4_is_first_fragment(ip41) && (dst_port1 != -1) && + (d1->ea_bits_len != 0 || !d1->rules) && + ip4_map_fragment_cache(ip41, dst_port1))) { + error1 = MAP_ERROR_FRAGMENT_MEMORY; + } + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip40->length)); + } + + if (PREDICT_TRUE(error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, + vnet_buffer(p1)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip41->length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; + next1 = (error1 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next1; + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip4_header_t *ip40; + map_domain_t *d0; + ip4_mapt_next_t next0; + u16 ip4_len0; + u8 error0; + i32 dst_port0; + ip4_mapt_pseudo_header_t *pheader0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + ip40 = vlib_buffer_get_current(p0); + ip4_len0 = clib_host_to_net_u16(ip40->length); + if (PREDICT_FALSE(p0->current_length < ip4_len0 || + ip40->ip_version_and_header_length != 0x45)) { + error0 = MAP_ERROR_UNKNOWN; + next0 = IP4_MAPT_NEXT_DROP; + } + + d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], + &vnet_buffer(p0)->map_t.map_domain_index); + + vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + + dst_port0 = -1; + ip4_map_t_classify(p0, d0, ip40, ip4_len0, &dst_port0, &error0, &next0); + + //Add MAP-T pseudo header in front of the packet + vlib_buffer_advance(p0, - sizeof(*pheader0)); + pheader0 = vlib_buffer_get_current(p0); + + //Save addresses within the packet + ip4_map_t_embedded_address(d0, &pheader0->saddr, &ip40->src_address); + pheader0->daddr.as_u64[0] = map_get_pfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0); + pheader0->daddr.as_u64[1] = map_get_sfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0); + + //It is important to cache at this stage because the result might be necessary + //for packets within the same vector. + //Actually, this approach even provides some limited out-of-order fragments support + if (PREDICT_FALSE(ip4_is_first_fragment(ip40) && (dst_port0 != -1) && + (d0->ea_bits_len != 0 || !d0->rules) && + ip4_map_fragment_cache(ip40, dst_port0))) { + error0 = MAP_ERROR_UNKNOWN; + } + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip40->length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static char *map_t_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = { + .function = ip4_map_t_fragmented, + .name = "ip4-map-t-fragmented", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_FRAGMENTED_N_NEXT, + .next_nodes = { + [IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, + [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = { + .function = ip4_map_t_icmp, + .name = "ip4-map-t-icmp", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_ICMP_N_NEXT, + .next_nodes = { + [IP4_MAPT_ICMP_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_ICMP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, + [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = { + .function = ip4_map_t_tcp_udp, + .name = "ip4-map-t-tcp-udp", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_TCP_UDP_N_NEXT, + .next_nodes = { + [IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, + [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip4_map_t_node) = { + .function = ip4_map_t, + .name = "ip4-map-t", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_N_NEXT, + .next_nodes = { + [IP4_MAPT_NEXT_MAPT_TCP_UDP] = "ip4-map-t-tcp-udp", + [IP4_MAPT_NEXT_MAPT_ICMP] = "ip4-map-t-icmp", + [IP4_MAPT_NEXT_MAPT_FRAGMENTED] = "ip4-map-t-fragmented", + [IP4_MAPT_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/ip4_sixrd.c b/vnet/vnet/map/ip4_sixrd.c new file mode 100644 index 00000000000..1e83ce831e0 --- /dev/null +++ b/vnet/vnet/map/ip4_sixrd.c @@ -0,0 +1,127 @@ +/*--------------------------------------------------------------------------- + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +#include "sixrd.h" + +vlib_node_registration_t ip4_sixrd_node; + +typedef enum { + IP4_SIXRD_NEXT_IP6_LOOKUP, + IP4_SIXRD_NEXT_DROP, + IP4_SIXRD_N_NEXT, +} ip4_sixrd_next_t; + +/* + * ip4_sixrd_sec_check + */ +static_always_inline void +ip4_sixrd_sec_check (sixrd_domain_t *d, ip4_address_t sa4, ip6_address_t sa6, u8 *error) +{ + u32 a = sixrd_get_addr(d, sa6.as_u64[0]); + clib_warning("Security check: %U %U", format_ip4_address, &a, format_ip4_address, &sa4); + if (PREDICT_FALSE(sixrd_get_addr(d, sa6.as_u64[0]) != sa4.as_u32)) + *error = SIXRD_ERROR_SEC_CHECK; +} + +/* + * ip4_sixrd + */ +static uword +ip4_sixrd (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_sixrd_node.index); + u32 decap = 0; + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = SIXRD_ERROR_NONE; + sixrd_domain_t *d0 = 0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 sixrd_domain_index0 = ~0; + u32 next0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip40 = vlib_buffer_get_current(p0); + + /* Throw away anything that isn't IP in IP. */ + if (PREDICT_TRUE(ip40->protocol == IP_PROTOCOL_IPV6 && clib_net_to_host_u16(ip40->length) >= 60)) { + vlib_buffer_advance(p0, sizeof(ip4_header_t)); + ip60 = vlib_buffer_get_current(p0); + d0 = ip4_sixrd_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip6_address_t *)&ip60->src_address, + &sixrd_domain_index0, &error0); + } else { + error0 = SIXRD_ERROR_BAD_PROTOCOL; + } + if (d0) { + /* SIXRD inbound security check */ + ip4_sixrd_sec_check(d0, ip40->src_address, ip60->src_address, &error0); + } + + next0 = error0 == SIXRD_ERROR_NONE ? IP4_SIXRD_NEXT_IP6_LOOKUP : IP4_SIXRD_NEXT_DROP; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + sixrd_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->sixrd_domain_index = sixrd_domain_index0; + } + + p0->error = error_node->errors[error0]; + if (PREDICT_TRUE(error0 == SIXRD_ERROR_NONE)) decap++; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter(vm, ip4_sixrd_node.index, SIXRD_ERROR_DECAPSULATED, decap); + + return frame->n_vectors; +} + +static char *sixrd_error_strings[] = { +#define _(sym,string) string, + foreach_sixrd_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip4_sixrd_node) = { + .function = ip4_sixrd, + .name = "ip4-sixrd", + .vector_size = sizeof(u32), + .format_trace = format_sixrd_trace, + .n_errors = SIXRD_N_ERROR, + .error_strings = sixrd_error_strings, + .n_next_nodes = IP4_SIXRD_N_NEXT, + .next_nodes = { + [IP4_SIXRD_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_SIXRD_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/ip6_map.c b/vnet/vnet/map/ip6_map.c new file mode 100644 index 00000000000..e803af9007a --- /dev/null +++ b/vnet/vnet/map/ip6_map.c @@ -0,0 +1,966 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "map.h" + +#include "../ip/ip_frag.h" + +enum ip6_map_next_e { + IP6_MAP_NEXT_IP4_LOOKUP, +#ifdef MAP_SKIP_IP6_LOOKUP + IP6_MAP_NEXT_IP4_REWRITE, +#endif + IP6_MAP_NEXT_IP6_REASS, + IP6_MAP_NEXT_IP4_REASS, + IP6_MAP_NEXT_IP4_FRAGMENT, + IP6_MAP_NEXT_IP6_ICMP_RELAY, + IP6_MAP_NEXT_IP6_LOCAL, + IP6_MAP_NEXT_DROP, + IP6_MAP_N_NEXT, +}; + +enum ip6_map_ip6_reass_next_e { + IP6_MAP_IP6_REASS_NEXT_IP6_MAP, + IP6_MAP_IP6_REASS_NEXT_DROP, + IP6_MAP_IP6_REASS_N_NEXT, +}; + +enum ip6_map_ip4_reass_next_e { + IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP, + IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT, + IP6_MAP_IP4_REASS_NEXT_DROP, + IP6_MAP_IP4_REASS_N_NEXT, +}; + +enum ip6_icmp_relay_next_e { + IP6_ICMP_RELAY_NEXT_IP4_LOOKUP, + IP6_ICMP_RELAY_NEXT_DROP, + IP6_ICMP_RELAY_N_NEXT, +}; + +vlib_node_registration_t ip6_map_ip4_reass_node; +vlib_node_registration_t ip6_map_ip6_reass_node; +static vlib_node_registration_t ip6_map_icmp_relay_node; + +typedef struct { + u32 map_domain_index; + u16 port; + u8 cached; +} map_ip6_map_ip4_reass_trace_t; + +u8 * +format_ip6_map_ip4_reass_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *); + map_ip6_map_ip4_reass_trace_t *t = va_arg (*args, map_ip6_map_ip4_reass_trace_t *); + return format(s, "MAP domain index: %d L4 port: %u Status: %s", t->map_domain_index, + t->port, t->cached?"cached":"forwarded"); +} + +typedef struct { + u16 offset; + u16 frag_len; + u8 out; +} map_ip6_map_ip6_reass_trace_t; + +u8 * +format_ip6_map_ip6_reass_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *); + map_ip6_map_ip6_reass_trace_t *t = va_arg (*args, map_ip6_map_ip6_reass_trace_t *); + return format(s, "Offset: %d Fragment length: %d Status: %s", t->offset, t->frag_len, t->out?"out":"in"); +} + +/* + * ip6_map_sec_check + */ +static_always_inline bool +ip6_map_sec_check (map_domain_t *d, u16 port, ip4_header_t *ip4, ip6_header_t *ip6) +{ + u16 sp4 = clib_net_to_host_u16(port); + u32 sa4 = clib_net_to_host_u32(ip4->src_address.as_u32); + u64 sal6 = map_get_pfx(d, sa4, sp4); + u64 sar6 = map_get_sfx(d, sa4, sp4); + + if (PREDICT_FALSE(sal6 != clib_net_to_host_u64(ip6->src_address.as_u64[0]) || + sar6 != clib_net_to_host_u64(ip6->src_address.as_u64[1]))) + return (false); + return (true); +} + +static_always_inline void +ip6_map_security_check (map_domain_t *d, ip4_header_t *ip4, ip6_header_t *ip6, u32 *next, u8 *error) +{ + map_main_t *mm = &map_main; + if (d->ea_bits_len || d->rules) { + if (d->psid_length > 0) { + if (!ip4_is_fragment(ip4)) { + u16 port = ip4_map_get_port(ip4, MAP_SENDER); + if (port) { + if (mm->sec_check) + *error = ip6_map_sec_check(d, port, ip4, ip6) ? MAP_ERROR_NONE : MAP_ERROR_DECAP_SEC_CHECK; + } else { + *error = MAP_ERROR_BAD_PROTOCOL; + } + } else { + *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next; + } + } + } +} + +static_always_inline bool +ip6_map_ip4_lookup_bypass (vlib_buffer_t *p0, ip4_header_t *ip) +{ +#ifdef MAP_SKIP_IP6_LOOKUP + map_main_t *mm = &map_main; + u32 adj_index0 = mm->adj4_index; + if (adj_index0 > 0) { + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm4, mm->adj4_index); + if (adj->n_adj > 1) { + u32 hash_c0 = ip4_compute_flow_hash(ip, IP_FLOW_HASH_DEFAULT); + adj_index0 += (hash_c0 & (adj->n_adj - 1)); + } + vnet_buffer(p0)->ip.adj_index[VLIB_TX] = adj_index0; + return (true); + } +#endif + return (false); +} + + +/* + * ip6_map + */ +static uword +ip6_map (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_node.index); + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Dual loop */ + while (n_left_from > 4 && n_left_to_next > 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + u8 error0 = MAP_ERROR_NONE; + u8 error1 = MAP_ERROR_NONE; + map_domain_t *d0 = 0, *d1 = 0; + ip4_header_t *ip40, *ip41; + ip6_header_t *ip60, *ip61; + u16 port0 = 0, port1 = 0; + u32 map_domain_index0 = ~0, map_domain_index1 = ~0; + u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP; + u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer(vm, from[2]); + p3 = vlib_get_buffer(vm, from[3]); + + vlib_prefetch_buffer_header(p2, LOAD); + vlib_prefetch_buffer_header(p3, LOAD); + + /* IPv6 + IPv4 header + 8 bytes of ULP */ + CLIB_PREFETCH(p2->data, 68, LOAD); + CLIB_PREFETCH(p3->data, 68, LOAD); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next +=2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip60 = vlib_buffer_get_current(p0); + ip61 = vlib_buffer_get_current(p1); + vlib_buffer_advance(p0, sizeof(ip6_header_t)); + vlib_buffer_advance(p1, sizeof(ip6_header_t)); + ip40 = vlib_buffer_get_current(p0); + ip41 = vlib_buffer_get_current(p1); + + /* + * Encapsulated IPv4 packet + * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled + * - Lookup/Rewrite or Fragment node in case of packet > MTU + * Fragmented IPv6 packet + * ICMP IPv6 packet + * - Error -> Pass to ICMPv6/ICMPv4 relay + * - Info -> Pass to IPv6 local + * Anything else -> drop + */ + if (PREDICT_TRUE(ip60->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip60->payload_length) > 20)) { + d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32, + &map_domain_index0, &error0); + } else if (ip60->protocol == IP_PROTOCOL_ICMP6 && + clib_net_to_host_u16(ip60->payload_length) > sizeof(icmp46_header_t)) { + icmp46_header_t *icmp = (void *)(ip60 + 1); + next0 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ? + IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY; + } else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) { + next0 = IP6_MAP_NEXT_IP6_REASS; + } else { + error0 = MAP_ERROR_BAD_PROTOCOL; + next0 = IP6_MAP_NEXT_DROP; + } + if (PREDICT_TRUE(ip61->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip61->payload_length) > 20)) { + d1 = ip6_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip41->src_address.as_u32, + &map_domain_index1, &error1); + } else if (ip61->protocol == IP_PROTOCOL_ICMP6 && + clib_net_to_host_u16(ip61->payload_length) > sizeof(icmp46_header_t)) { + icmp46_header_t *icmp = (void *)(ip61 + 1); + next1 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ? + IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY; + } else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) { + next1 = IP6_MAP_NEXT_IP6_REASS; + } else { + error1 = MAP_ERROR_BAD_PROTOCOL; + next1 = IP6_MAP_NEXT_DROP; + } + + if (d0) { + /* MAP inbound security check */ + ip6_map_security_check(d0, ip40, ip60, &next0, &error0); + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && + next0 == IP6_MAP_NEXT_IP4_LOOKUP)) { + if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu))) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.flags = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + next0 = IP6_MAP_NEXT_IP4_FRAGMENT; + } else { + next0 = ip6_map_ip4_lookup_bypass(p0, ip40) ? IP6_MAP_NEXT_IP4_REWRITE : next0; + } + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip40->length)); + } + } + if (d1) { + /* MAP inbound security check */ + ip6_map_security_check(d1, ip41, ip61, &next1, &error1); + + if (PREDICT_TRUE(error1 == MAP_ERROR_NONE && + next1 == IP6_MAP_NEXT_IP4_LOOKUP)) { + if (PREDICT_FALSE(d1->mtu && (clib_host_to_net_u16(ip41->length) > d1->mtu))) { + vnet_buffer(p1)->ip_frag.header_offset = 0; + vnet_buffer(p1)->ip_frag.flags = 0; + vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer(p1)->ip_frag.mtu = d0->mtu; + next1 = IP6_MAP_NEXT_IP4_FRAGMENT; + } else { + next1 = ip6_map_ip4_lookup_bypass(p1, ip41) ? IP6_MAP_NEXT_IP4_REWRITE : next1; + } + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index1, 1, + clib_net_to_host_u16(ip41->length)); + } + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + } + + if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p1, sizeof(*tr)); + tr->map_domain_index = map_domain_index1; + tr->port = port1; + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1); + } + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + map_domain_t *d0 = 0; + ip4_header_t *ip40; + ip6_header_t *ip60; + i32 port0 = 0; + u32 map_domain_index0 = ~0; + u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + vlib_buffer_advance(p0, sizeof(ip6_header_t)); + ip40 = vlib_buffer_get_current(p0); + + /* + * Encapsulated IPv4 packet + * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled + * - Lookup/Rewrite or Fragment node in case of packet > MTU + * Fragmented IPv6 packet + * ICMP IPv6 packet + * - Error -> Pass to ICMPv6/ICMPv4 relay + * - Info -> Pass to IPv6 local + * Anything else -> drop + */ + if (PREDICT_TRUE(ip60->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip60->payload_length) > 20)) { + d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32, + &map_domain_index0, &error0); + } else if (ip60->protocol == IP_PROTOCOL_ICMP6 && + clib_net_to_host_u16(ip60->payload_length) > sizeof(icmp46_header_t)) { + icmp46_header_t *icmp = (void *)(ip60 + 1); + next0 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ? + IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY; + } else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION && + (((ip6_frag_hdr_t *)(ip60+1))->next_hdr == IP_PROTOCOL_IP_IN_IP)) { + next0 = IP6_MAP_NEXT_IP6_REASS; + } else { + error0 = MAP_ERROR_BAD_PROTOCOL; + } + + if (d0) { + /* MAP inbound security check */ + ip6_map_security_check(d0, ip40, ip60, &next0, &error0); + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && + next0 == IP6_MAP_NEXT_IP4_LOOKUP)) { + if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu))) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.flags = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + next0 = IP6_MAP_NEXT_IP4_FRAGMENT; + } else { + next0 = ip6_map_ip4_lookup_bypass(p0, ip40) ? IP6_MAP_NEXT_IP4_REWRITE : next0; + } + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip40->length)); + } + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = (u16)port0; + } + + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +static_always_inline void +ip6_map_ip6_reass_prepare(vlib_main_t *vm, vlib_node_runtime_t *node, map_ip6_reass_t *r, + u32 **fragments_ready, u32 **fragments_to_drop) +{ + ip4_header_t *ip40; + ip6_header_t *ip60; + ip6_frag_hdr_t *frag0; + vlib_buffer_t *p0; + + if(!r->ip4_header.ip_version_and_header_length) + return; + + //The IP header is here, we need to check for packets + //that can be forwarded + int i; + for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) { + if (r->fragments[i].pi == ~0 || + ((!r->fragments[i].next_data_len) && (r->fragments[i].next_data_offset != (0xffff)))) + continue; + + p0 = vlib_get_buffer(vm, r->fragments[i].pi); + ip60 = vlib_buffer_get_current(p0); + frag0 = (ip6_frag_hdr_t *)(ip60 + 1); + ip40 = (ip4_header_t *)(frag0 + 1); + + if (ip6_frag_hdr_offset(frag0)) { + //Not first fragment, add the IPv4 header + memcpy(ip40, &r->ip4_header, 20); + } + +#ifdef MAP_IP6_REASS_COUNT_BYTES + r->forwarded += clib_net_to_host_u16(ip60->payload_length) - sizeof(*frag0); +#endif + + if (ip6_frag_hdr_more(frag0)) { + //Not last fragment, we copy end of next + memcpy(u8_ptr_add(ip60, p0->current_length), r->fragments[i].next_data, 20); + p0->current_length += 20; + ip60->payload_length = u16_net_add(ip60->payload_length, 20); + } + + if (!ip4_is_fragment(ip40)) { + ip40->fragment_id = frag_id_6to4(frag0->identification); + ip40->flags_and_fragment_offset = clib_host_to_net_u16(ip6_frag_hdr_offset(frag0)); + } else { + ip40->flags_and_fragment_offset = clib_host_to_net_u16(ip4_get_fragment_offset(ip40) + ip6_frag_hdr_offset(frag0)); + } + + if (ip6_frag_hdr_more(frag0)) + ip40->flags_and_fragment_offset |= clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS); + + ip40->length = clib_host_to_net_u16(p0->current_length - sizeof(*ip60) - sizeof(*frag0)); + ip40->checksum = ip4_header_checksum(ip40); + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_ip6_map_ip6_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->offset = ip4_get_fragment_offset(ip40); + tr->frag_len = clib_net_to_host_u16(ip40->length) - sizeof(*ip40); + tr->out = 1; + } + + vec_add1(*fragments_ready, r->fragments[i].pi); + r->fragments[i].pi = ~0; + r->fragments[i].next_data_len = 0; + r->fragments[i].next_data_offset = 0; + map_main.ip6_reass_buffered_counter--; + + //TODO: Best solution would be that ip6_map handles extension headers + // and ignores atomic fragment. But in the meantime, let's just copy the header. + + u8 protocol = frag0->next_hdr; + memmove(u8_ptr_add(ip40, - sizeof(*ip60)), ip60, sizeof(*ip60)); + ((ip6_header_t *)u8_ptr_add(ip40, - sizeof(*ip60)))->protocol = protocol; + vlib_buffer_advance(p0, sizeof(*frag0)); + } +} + +void +map_ip6_drop_pi(u32 pi) +{ + vlib_main_t *vm = vlib_get_main(); + vlib_node_runtime_t *n = vlib_node_get_runtime(vm, ip6_map_ip6_reass_node.index); + vlib_set_next_frame_buffer(vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi); +} + +void +map_ip4_drop_pi(u32 pi) +{ + vlib_main_t *vm = vlib_get_main(); + vlib_node_runtime_t *n = vlib_node_get_runtime(vm, ip6_map_ip4_reass_node.index); + vlib_set_next_frame_buffer(vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi); +} + +/* + * ip6_reass + * TODO: We should count the number of successfully + * transmitted fragment bytes and compare that to the last fragment + * offset such that we can free the reassembly structure when all fragments + * have been forwarded. + */ +static uword +ip6_map_ip6_reass (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_ip6_reass_node.index); + u32 *fragments_to_drop = NULL; + u32 *fragments_ready = NULL; + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + ip6_header_t *ip60; + ip6_frag_hdr_t *frag0; + u16 offset; + u16 next_offset; + u16 frag_len; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + frag0 = (ip6_frag_hdr_t *)(ip60 + 1); + offset = clib_host_to_net_u16(frag0->fragment_offset_and_more) & (~7); + frag_len = clib_net_to_host_u16(ip60->payload_length) - sizeof(*frag0); + next_offset = ip6_frag_hdr_more(frag0) ? (offset + frag_len) : (0xffff); + + //FIXME: Support other extension headers, maybe + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_ip6_map_ip6_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->offset = offset; + tr->frag_len = frag_len; + tr->out = 0; + } + + map_ip6_reass_lock(); + map_ip6_reass_t *r = map_ip6_reass_get(&ip60->src_address, &ip60->dst_address, + frag0->identification, frag0->next_hdr, &fragments_to_drop); + //FIXME: Use better error codes + if (PREDICT_FALSE(!r)) { + // Could not create a caching entry + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } else if (PREDICT_FALSE((frag_len <= 20 && + (ip6_frag_hdr_more(frag0) || (!offset))))) { + //Very small fragment are restricted to the last one and + //can't be the first one + error0 = MAP_ERROR_FRAGMENT_MALFORMED; + } else if (map_ip6_reass_add_fragment(r, pi0, offset, next_offset, (u8 *)(frag0 + 1), frag_len)) { + map_ip6_reass_free(r, &fragments_to_drop); + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } else { +#ifdef MAP_IP6_REASS_COUNT_BYTES + if (!ip6_frag_hdr_more(frag0)) + r->expected_total = offset + frag_len; +#endif + ip6_map_ip6_reass_prepare(vm, node, r, &fragments_ready, &fragments_to_drop); +#ifdef MAP_IP6_REASS_COUNT_BYTES + if(r->forwarded >= r->expected_total) + map_ip6_reass_free(r, &fragments_to_drop); +#endif + } + map_ip6_reass_unlock(); + + if (error0 == MAP_ERROR_NONE) { + if (frag_len > 20) { + //Dequeue the packet + n_left_to_next++; + to_next--; + } else { + //All data from that packet was copied no need to keep it, but this is not an error + p0->error = error_node->errors[MAP_ERROR_NONE]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, IP6_MAP_IP6_REASS_NEXT_DROP); + } + } else { + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, IP6_MAP_IP6_REASS_NEXT_DROP); + } + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + map_send_all_to_node(vm, fragments_ready, node, + &error_node->errors[MAP_ERROR_NONE], + IP6_MAP_IP6_REASS_NEXT_IP6_MAP); + map_send_all_to_node(vm, fragments_to_drop, node, + &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], + IP6_MAP_IP6_REASS_NEXT_DROP); + + vec_free(fragments_to_drop); + vec_free(fragments_ready); + return frame->n_vectors; +} + +/* + * ip6_ip4_virt_reass + */ +static uword +ip6_map_ip4_reass (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_ip4_reass_node.index); + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 cpu_index = os_get_cpu_number(); + u32 *fragments_to_drop = NULL; + u32 *fragments_to_loopback = NULL; + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + map_domain_t *d0; + ip4_header_t *ip40; + ip6_header_t *ip60; + i32 port0 = 0; + u32 map_domain_index0; + u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP; + u8 cached = 0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip40 = vlib_buffer_get_current(p0); + ip60 = ((ip6_header_t *)ip40) - 1; + + d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32, + &map_domain_index0, &error0); + + map_ip4_reass_lock(); + //This node only deals with fragmented ip4 + map_ip4_reass_t *r = map_ip4_reass_get(ip40->src_address.as_u32, ip40->dst_address.as_u32, + ip40->fragment_id, ip40->protocol, &fragments_to_drop); + if (PREDICT_FALSE(!r)) { + // Could not create a caching entry + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } else if (PREDICT_TRUE(ip4_get_fragment_offset(ip40))) { + // This is a fragment + if (r->port >= 0) { + // We know the port already + port0 = r->port; + } else if (map_ip4_reass_add_fragment(r, pi0)) { + // Not enough space for caching + error0 = MAP_ERROR_FRAGMENT_MEMORY; + map_ip4_reass_free(r, &fragments_to_drop); + } else { + cached = 1; + } + } else if ((port0 = ip4_get_port(ip40, MAP_SENDER, p0->current_length)) < 0) { + // Could not find port from first fragment. Stop reassembling. + error0 = MAP_ERROR_BAD_PROTOCOL; + port0 = 0; + map_ip4_reass_free(r, &fragments_to_drop); + } else { + // Found port. Remember it and loopback saved fragments + r->port = port0; + map_ip4_reass_get_fragments(r, &fragments_to_loopback); + } + +#ifdef MAP_IP4_REASS_COUNT_BYTES + if (!cached && r) { + r->forwarded += clib_host_to_net_u16(ip40->length) - 20; + if (!ip4_get_fragment_more(ip40)) + r->expected_total = ip4_get_fragment_offset(ip40) * 8 + clib_host_to_net_u16(ip40->length) - 20; + if(r->forwarded >= r->expected_total) + map_ip4_reass_free(r, &fragments_to_drop); + } +#endif + + map_ip4_reass_unlock(); + + if(PREDICT_TRUE(error0 == MAP_ERROR_NONE)) + error0 = ip6_map_sec_check(d0, port0, ip40, ip60) ? MAP_ERROR_NONE : MAP_ERROR_DECAP_SEC_CHECK; + + if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu) && + error0 == MAP_ERROR_NONE && !cached)) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.flags = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_ip6_map_ip4_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + tr->cached = cached; + } + + if (cached) { + //Dequeue the packet + n_left_to_next++; + to_next--; + } else { + if (error0 == MAP_ERROR_NONE) + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip40->length)); + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + + //Loopback when we reach the end of the inpu vector + if(n_left_from == 0 && vec_len(fragments_to_loopback)) { + from = vlib_frame_vector_args(frame); + u32 len = vec_len(fragments_to_loopback); + if(len <= VLIB_FRAME_SIZE) { + memcpy(from, fragments_to_loopback, sizeof(u32)*len); + n_left_from = len; + vec_reset_length(fragments_to_loopback); + } else { + memcpy(from, fragments_to_loopback + (len - VLIB_FRAME_SIZE), sizeof(u32)*VLIB_FRAME_SIZE); + n_left_from = VLIB_FRAME_SIZE; + _vec_len(fragments_to_loopback) = len - VLIB_FRAME_SIZE; + } + } + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + map_send_all_to_node(vm, fragments_to_drop, node, + &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], + IP6_MAP_IP4_REASS_NEXT_DROP); + + vec_free(fragments_to_drop); + vec_free(fragments_to_loopback); + return frame->n_vectors; +} + +/* + * ip6_icmp_relay + */ +static uword +ip6_map_icmp_relay (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_icmp_relay_node.index); + map_main_t *mm = &map_main; + u32 cpu_index = os_get_cpu_number(); + u16 *fragment_ids, *fid; + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + /* Get random fragment IDs for replies. */ + fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer, n_left_from * sizeof (fragment_ids[0])); + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + ip6_header_t *ip60; + u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP; + u32 mtu; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + u16 tlen = clib_net_to_host_u16(ip60->payload_length); + + /* + * In: + * IPv6 header (40) + * ICMPv6 header (8) + * IPv6 header (40) + * Original IPv4 header / packet + * Out: + * New IPv4 header + * New ICMP header + * Original IPv4 header / packet + */ + + /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */ + if (tlen < 76) { + error0 = MAP_ERROR_ICMP_RELAY; + goto error; + } + + icmp46_header_t *icmp60 = (icmp46_header_t *)(ip60 + 1); + ip6_header_t *inner_ip60 = (ip6_header_t *)(icmp60 + 2); + + if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP) { + error0 = MAP_ERROR_ICMP_RELAY; + goto error; + } + + ip4_header_t *inner_ip40 = (ip4_header_t *)(inner_ip60 + 1); + vlib_buffer_advance(p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */ + ip4_header_t *new_ip40 = vlib_buffer_get_current(p0); + icmp46_header_t *new_icmp40 = (icmp46_header_t *)(new_ip40 + 1); + + /* + * Relay according to RFC2473, section 8.3 + */ + switch (icmp60->type) { + case ICMP6_destination_unreachable: + case ICMP6_time_exceeded: + case ICMP6_parameter_problem: + /* Type 3 - destination unreachable, Code 1 - host unreachable */ + new_icmp40->type = ICMP4_destination_unreachable; + new_icmp40->code = ICMP4_destination_unreachable_destination_unreachable_host; + break; + + case ICMP6_packet_too_big: + /* Type 3 - destination unreachable, Code 4 - packet too big */ + /* Potential TODO: Adjust domain tunnel MTU based on the value received here */ + mtu = clib_net_to_host_u32(*((u32 *)(icmp60 + 1))); + + /* Check DF flag */ + if (!(inner_ip40->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT))) { + error0 = MAP_ERROR_ICMP_RELAY; + goto error; + } + + new_icmp40->type = ICMP4_destination_unreachable; + new_icmp40->code = ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set; + *((u32 *)(new_icmp40 + 1)) = clib_host_to_net_u32(mtu < 1280 ? 1280 : mtu); + break; + + default: + error0 = MAP_ERROR_ICMP_RELAY; + break; + } + + /* + * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812) + */ + new_ip40->ip_version_and_header_length = 0x45; + new_ip40->tos = 0; + u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20; + new_ip40->length = clib_host_to_net_u16(nlen); + new_ip40->fragment_id = fid[0]; fid++; + new_ip40->ttl = 64; + new_ip40->protocol = IP_PROTOCOL_ICMP; + new_ip40->src_address = mm->icmp_src_address; + new_ip40->dst_address = inner_ip40->src_address; + new_ip40->checksum = ip4_header_checksum(new_ip40); + + new_icmp40->checksum = 0; + ip_csum_t sum = ip_incremental_checksum(0, new_icmp40, nlen - 20); + new_icmp40->checksum = ~ip_csum_fold(sum); + + vlib_increment_simple_counter(&mm->icmp_relayed, cpu_index, 0, 1); + + error: + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = 0; + tr->port = 0; + } + + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; + +} + +static char *map_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip6_map_node) = { + .function = ip6_map, + .name = "ip6-map", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + + .n_next_nodes = IP6_MAP_N_NEXT, + .next_nodes = { + [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup", +#ifdef MAP_SKIP_IP6_LOOKUP + [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite-transit", +#endif + [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass", + [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass", + [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay", + [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local", + [IP6_MAP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = { + .function = ip6_map_ip6_reass, + .name = "ip6-map-ip6-reass", + .vector_size = sizeof(u32), + .format_trace = format_ip6_map_ip6_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT, + .next_nodes = { + [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map", + [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = { + .function = ip6_map_ip4_reass, + .name = "ip6-map-ip4-reass", + .vector_size = sizeof(u32), + .format_trace = format_ip6_map_ip4_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT, + .next_nodes = { + [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = { + .function = ip6_map_icmp_relay, + .name = "ip6-map-icmp-relay", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, //FIXME + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + .n_next_nodes = IP6_ICMP_RELAY_N_NEXT, + .next_nodes = { + [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/ip6_map_t.c b/vnet/vnet/map/ip6_map_t.c new file mode 100644 index 00000000000..7720e06fba4 --- /dev/null +++ b/vnet/vnet/map/ip6_map_t.c @@ -0,0 +1,1141 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "map.h" + +#include "../ip/ip_frag.h" + +#define IP6_MAP_T_DUAL_LOOP + +typedef enum { + IP6_MAPT_NEXT_MAPT_TCP_UDP, + IP6_MAPT_NEXT_MAPT_ICMP, + IP6_MAPT_NEXT_MAPT_FRAGMENTED, + IP6_MAPT_NEXT_DROP, + IP6_MAPT_N_NEXT +} ip6_mapt_next_t; + +typedef enum { + IP6_MAPT_ICMP_NEXT_IP4_LOOKUP, + IP6_MAPT_ICMP_NEXT_IP4_FRAG, + IP6_MAPT_ICMP_NEXT_DROP, + IP6_MAPT_ICMP_N_NEXT +} ip6_mapt_icmp_next_t; + +typedef enum { + IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP, + IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG, + IP6_MAPT_TCP_UDP_NEXT_DROP, + IP6_MAPT_TCP_UDP_N_NEXT +} ip6_mapt_tcp_udp_next_t; + +typedef enum { + IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP, + IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG, + IP6_MAPT_FRAGMENTED_NEXT_DROP, + IP6_MAPT_FRAGMENTED_N_NEXT +} ip6_mapt_fragmented_next_t; + +static_always_inline int +ip6_map_fragment_cache (ip6_header_t *ip6, ip6_frag_hdr_t *frag, map_domain_t *d, u16 port) +{ + u32 *ignore = NULL; + map_ip4_reass_lock(); + map_ip4_reass_t *r = map_ip4_reass_get(map_get_ip4(&ip6->src_address), ip6_map_t_embedded_address(d, &ip6->dst_address), + frag_id_6to4(frag->identification), + (ip6->protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : ip6->protocol, + &ignore); + if (r) + r->port = port; + + map_ip4_reass_unlock(); + return !r; +} + +/* Returns the associated port or -1 */ +static_always_inline i32 +ip6_map_fragment_get(ip6_header_t *ip6, ip6_frag_hdr_t *frag, map_domain_t *d) +{ + u32 *ignore = NULL; + map_ip4_reass_lock(); + map_ip4_reass_t *r = map_ip4_reass_get(map_get_ip4(&ip6->src_address), ip6_map_t_embedded_address(d, &ip6->dst_address), + frag_id_6to4(frag->identification), + (ip6->protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : ip6->protocol, + &ignore); + i32 ret = r?r->port:-1; + map_ip4_reass_unlock(); + return ret; +} + +static_always_inline u8 +ip6_translate_tos(const ip6_header_t *ip6) +{ +#ifdef IP6_MAP_T_OVERRIDE_TOS + return IP6_MAP_T_OVERRIDE_TOS; +#else + return (clib_net_to_host_u32(ip6->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >> 20; +#endif +} + +//TODO: Find right place in memory for that +static u8 icmp6_to_icmp_updater_pointer_table[] = + { 0, 1,~0,~0, + 2, 2, 9, 8, + 12,12,12,12, + 12,12,12,12, + 12,12,12,12, + 12,12,12,12, + 24,24,24,24, + 24,24,24,24, + 24,24,24,24, + 24,24,24,24 + }; + +static_always_inline int +ip6_icmp_to_icmp6_in_place (icmp46_header_t *icmp, u32 icmp_len, + i32 *sender_port, ip6_header_t **inner_ip6) +{ + *inner_ip6 = NULL; + switch (icmp->type) { + case ICMP6_echo_request: + *sender_port = ((u16 *)icmp)[2]; + icmp->type = ICMP4_echo_request; + break; + case ICMP6_echo_reply: + *sender_port = ((u16 *)icmp)[2]; + icmp->type = ICMP4_echo_reply; + break; + case ICMP6_destination_unreachable: + *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8); + *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len); + + switch (icmp->code) { + case ICMP6_destination_unreachable_no_route_to_destination: //0 + case ICMP6_destination_unreachable_beyond_scope_of_source_address: //2 + case ICMP6_destination_unreachable_address_unreachable: //3 + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_destination_unreachable_host; + break; + case ICMP6_destination_unreachable_destination_administratively_prohibited: //1 + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_communication_administratively_prohibited; + break; + case ICMP6_destination_unreachable_port_unreachable: + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_port_unreachable; + break; + default: + return -1; + } + break; + case ICMP6_packet_too_big: + *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8); + *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len); + + icmp->type = ICMP4_destination_unreachable; + icmp->code = 4; + { + u32 advertised_mtu = clib_net_to_host_u32(*((u32 *)(icmp + 1))); + advertised_mtu -= 20; + //FIXME: = minimum(advertised MTU-20, MTU_of_IPv4_nexthop, (MTU_of_IPv6_nexthop)-20) + ((u16 *)(icmp))[3] = clib_host_to_net_u16(advertised_mtu); + } + break; + + case ICMP6_time_exceeded: + *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8); + *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len); + + icmp->type = ICMP4_time_exceeded; + break; + + case ICMP6_parameter_problem: + *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8); + *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len); + + switch (icmp->code) { + case ICMP6_parameter_problem_erroneous_header_field: + icmp->type = ICMP4_parameter_problem; + icmp->code = ICMP4_parameter_problem_pointer_indicates_error; + u32 pointer = clib_net_to_host_u32(*((u32*)(icmp + 1))); + if (pointer >= 40) + return -1; + + ((u8*)(icmp + 1))[0] = icmp6_to_icmp_updater_pointer_table[pointer]; + break; + case ICMP6_parameter_problem_unrecognized_next_header: + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_port_unreachable; + break; + case ICMP6_parameter_problem_unrecognized_option: + default: + return -1; + } + break; + default: + return -1; + break; + } + return 0; +} + +static_always_inline void +_ip6_map_t_icmp (map_domain_t *d, vlib_buffer_t *p, u8 *error) +{ + ip6_header_t *ip6, *inner_ip6; + ip4_header_t *ip4, *inner_ip4; + u32 ip6_pay_len; + icmp46_header_t *icmp; + i32 sender_port; + ip_csum_t csum; + u32 ip4_sadr, inner_ip4_dadr; + + ip6 = vlib_buffer_get_current(p); + ip6_pay_len = clib_net_to_host_u16(ip6->payload_length); + icmp = (icmp46_header_t *)(ip6 + 1); + ASSERT(ip6_pay_len + sizeof(*ip6) <= p->current_length); + + if (ip6->protocol != IP_PROTOCOL_ICMP6) { + //No extensions headers allowed here + //TODO: SR header + *error = MAP_ERROR_MALFORMED; + return; + } + + //There are no fragmented ICMP messages, so no extension header for now + + if (ip6_icmp_to_icmp6_in_place(icmp, ip6_pay_len, &sender_port, &inner_ip6)) { + //TODO: In case of 1:1 mapping it is not necessary to have the sender port + *error = MAP_ERROR_ICMP; + return; + } + + if (sender_port < 0) { + // In case of 1:1 mapping, we don't care about the port + if(d->ea_bits_len == 0 && d->rules) { + sender_port = 0; + } else { + *error = MAP_ERROR_ICMP; + return; + } + } + + //Security check + //Note that this prevents an intermediate IPv6 router from answering the request + ip4_sadr = map_get_ip4(&ip6->src_address); + if (ip6->src_address.as_u64[0] != map_get_pfx_net(d, ip4_sadr, sender_port) || + ip6->src_address.as_u64[1] != map_get_sfx_net(d, ip4_sadr, sender_port)) { + *error = MAP_ERROR_SEC_CHECK; + return; + } + + if (inner_ip6) { + u16 *inner_L4_checksum, inner_l4_offset, inner_frag_offset, inner_frag_id; + u8 *inner_l4, inner_protocol; + + //We have two headers to translate + // FROM + // [ IPv6 ]<- ext ->[IC][ IPv6 ]<- ext ->[L4 header ... + // Handled cases: + // [ IPv6 ][IC][ IPv6 ][L4 header ... + // [ IPv6 ][IC][ IPv6 ][Fr][L4 header ... + // TO + // [ IPv4][IC][ IPv4][L4 header ... + + //TODO: This was already done deep in ip6_icmp_to_icmp6_in_place + //We shouldn't have to do it again + if (ip6_parse(inner_ip6, ip6_pay_len - 8, + &inner_protocol, &inner_l4_offset, &inner_frag_offset)) { + *error = MAP_ERROR_MALFORMED; + return; + } + + inner_l4 = u8_ptr_add(inner_ip6, inner_l4_offset); + inner_ip4 = (ip4_header_t *) u8_ptr_add(inner_l4, - sizeof(*inner_ip4)); + if (inner_frag_offset) { + ip6_frag_hdr_t *inner_frag = (ip6_frag_hdr_t *) u8_ptr_add(inner_ip6, inner_frag_offset); + inner_frag_id = frag_id_6to4(inner_frag->identification); + } else { + inner_frag_id = 0; + } + + //Do the translation of the inner packet + if (inner_protocol == IP_PROTOCOL_TCP) { + inner_L4_checksum = (u16 *) u8_ptr_add(inner_l4, 16); + } else if (inner_protocol == IP_PROTOCOL_UDP) { + inner_L4_checksum = (u16 *) u8_ptr_add(inner_l4, 6); + } else if (inner_protocol == IP_PROTOCOL_ICMP6) { + icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4; + csum = inner_icmp->checksum; + csum = ip_csum_sub_even(csum, *((u16 *)inner_icmp)); + //It cannot be of a different type as ip6_icmp_to_icmp6_in_place succeeded + inner_icmp->type = (inner_icmp->type == ICMP6_echo_request) ? + ICMP4_echo_request : ICMP4_echo_reply; + csum = ip_csum_add_even(csum, *((u16 *)inner_icmp)); + inner_icmp->checksum = ip_csum_fold(csum); + inner_protocol = IP_PROTOCOL_ICMP; //Will be copied to ip6 later + inner_L4_checksum = &inner_icmp->checksum; + } else { + *error = MAP_ERROR_BAD_PROTOCOL; + return; + } + + csum = *inner_L4_checksum; + csum = ip_csum_sub_even(csum, inner_ip6->src_address.as_u64[0]); + csum = ip_csum_sub_even(csum, inner_ip6->src_address.as_u64[1]); + csum = ip_csum_sub_even(csum, inner_ip6->dst_address.as_u64[0]); + csum = ip_csum_sub_even(csum, inner_ip6->dst_address.as_u64[1]); + + //Sanity check of the outer destination address + if (ip6->dst_address.as_u64[0] != inner_ip6->src_address.as_u64[0] && + ip6->dst_address.as_u64[1] != inner_ip6->src_address.as_u64[1]) { + *error = MAP_ERROR_SEC_CHECK; + return; + } + + //Security check of inner packet + inner_ip4_dadr = map_get_ip4(&inner_ip6->dst_address); + if (inner_ip6->dst_address.as_u64[0] != map_get_pfx_net(d, inner_ip4_dadr, sender_port) || + inner_ip6->dst_address.as_u64[1] != map_get_sfx_net(d, inner_ip4_dadr, sender_port)) { + *error = MAP_ERROR_SEC_CHECK; + return; + } + + inner_ip4->dst_address.as_u32 = inner_ip4_dadr; + inner_ip4->src_address.as_u32 = ip6_map_t_embedded_address(d, &inner_ip6->src_address); + inner_ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + inner_ip4->tos = ip6_translate_tos(inner_ip6); + inner_ip4->length = u16_net_add(inner_ip6->payload_length, sizeof(*ip4) + sizeof(*ip6) - + inner_l4_offset); + inner_ip4->fragment_id = inner_frag_id; + inner_ip4->flags_and_fragment_offset = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS); + inner_ip4->ttl = inner_ip6->hop_limit; + inner_ip4->protocol = inner_protocol; + inner_ip4->checksum = ip4_header_checksum(inner_ip4); + + if (inner_ip4->protocol == IP_PROTOCOL_ICMP) { + //Remove remainings of the pseudo-header in the csum + csum = ip_csum_sub_even(csum, clib_host_to_net_u16(IP_PROTOCOL_ICMP6)); + csum = ip_csum_sub_even(csum, inner_ip4->length - sizeof(*inner_ip4)); + } else { + //Update to new pseudo-header + csum = ip_csum_add_even(csum, inner_ip4->src_address.as_u32); + csum = ip_csum_add_even(csum, inner_ip4->dst_address.as_u32); + } + *inner_L4_checksum = ip_csum_fold(csum); + + //Move up icmp header + ip4 = (ip4_header_t *) u8_ptr_add(inner_l4, - 2 * sizeof(*ip4) - 8); + memcpy(u8_ptr_add(inner_l4, - sizeof(*ip4) - 8), icmp, 8); + icmp = (icmp46_header_t *) u8_ptr_add(inner_l4, - sizeof(*ip4) - 8); + } else { + //Only one header to translate + ip4 = (ip4_header_t *) u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)); + } + vlib_buffer_advance(p, (u32) (((u8 *)ip4) - ((u8 *)ip6))); + + ip4->dst_address.as_u32 = ip6_map_t_embedded_address(d, &ip6->dst_address); + ip4->src_address.as_u32 = ip4_sadr; + ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos(ip6); + ip4->fragment_id = 0; + ip4->flags_and_fragment_offset = 0; + ip4->ttl = ip6->hop_limit; + ip4->protocol = IP_PROTOCOL_ICMP; + //TODO fix the length depending on offset length + ip4->length = u16_net_add(ip6->payload_length, + (inner_ip6 == NULL)?sizeof(*ip4):(2*sizeof(*ip4) - sizeof(*ip6))); + ip4->checksum = ip4_header_checksum(ip4); + + //TODO: We could do an easy diff-checksum for echo requests/replies + //Recompute ICMP checksum + icmp->checksum = 0; + csum = ip_incremental_checksum(0, icmp, clib_net_to_host_u16(ip4->length) - sizeof(*ip4)); + icmp->checksum = ~ip_csum_fold (csum); +} + +static uword +ip6_map_t_icmp (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip6_map_t_icmp_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 cpu_index = os_get_cpu_number(); + + while (n_left_from > 0) { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0; + ip6_mapt_icmp_next_t next0; + map_domain_t *d0; + u16 len0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + next0 = IP6_MAPT_ICMP_NEXT_IP4_LOOKUP; + + p0 = vlib_get_buffer(vm, pi0); + len0 = clib_net_to_host_u16(((ip6_header_t *)vlib_buffer_get_current(p0))->payload_length); + d0 = pool_elt_at_index(map_main.domains, vnet_buffer(p0)->map_t.map_domain_index); + _ip6_map_t_icmp(d0, p0, &error0); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + //Send to fragmentation node if necessary + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG; + } + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + len0); + } else { + next0 = IP6_MAPT_ICMP_NEXT_DROP; + } + + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static uword +ip6_map_t_fragmented (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP6_MAP_T_DUAL_LOOP + while(n_left_from >= 4 && n_left_to_next >= 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip6_header_t *ip60, *ip61; + ip6_frag_hdr_t *frag0, *frag1; + ip4_header_t *ip40, *ip41; + u16 frag_id0, frag_offset0, + frag_id1, frag_offset1; + u8 frag_more0, frag_more1; + ip6_mapt_fragmented_next_t next0, next1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip60 = vlib_buffer_get_current(p0); + ip61 = vlib_buffer_get_current(p1); + frag0 = (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + frag1 = (ip6_frag_hdr_t *)u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset); + ip40 = (ip4_header_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + ip41 = (ip4_header_t *)u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p1, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40)); + + frag_id0 = frag_id_6to4(frag0->identification); + frag_id1 = frag_id_6to4(frag1->identification); + frag_more0 = ip6_frag_hdr_more(frag0); + frag_more1 = ip6_frag_hdr_more(frag1); + frag_offset0 = ip6_frag_hdr_offset(frag0); + frag_offset1 = ip6_frag_hdr_offset(frag1); + + ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr; + ip41->dst_address.as_u32 = vnet_buffer(p1)->map_t.v6.daddr; + ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr; + ip41->src_address.as_u32 = vnet_buffer(p1)->map_t.v6.saddr; + ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip41->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip40->tos = ip6_translate_tos(ip60); + ip41->tos = ip6_translate_tos(ip61); + ip40->length = u16_net_add(ip60->payload_length, + sizeof(*ip40) - vnet_buffer(p0)->map_t.v6.l4_offset + sizeof(*ip60)); + ip41->length = u16_net_add(ip61->payload_length, + sizeof(*ip40) - vnet_buffer(p1)->map_t.v6.l4_offset + sizeof(*ip60)); + ip40->fragment_id = frag_id0; + ip41->fragment_id = frag_id1; + ip40->flags_and_fragment_offset = + clib_host_to_net_u16(frag_offset0 | (frag_more0?IP4_HEADER_FLAG_MORE_FRAGMENTS:0)); + ip41->flags_and_fragment_offset = + clib_host_to_net_u16(frag_offset1 | (frag_more1?IP4_HEADER_FLAG_MORE_FRAGMENTS:0)); + ip40->ttl = ip60->hop_limit; + ip41->ttl = ip61->hop_limit; + ip40->protocol = (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6)? + IP_PROTOCOL_ICMP:vnet_buffer(p0)->map_t.v6.l4_protocol; + ip41->protocol = (vnet_buffer(p1)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6)? + IP_PROTOCOL_ICMP:vnet_buffer(p1)->map_t.v6.l4_protocol; + ip40->checksum = ip4_header_checksum(ip40); + ip41->checksum = ip4_header_checksum(ip41); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } + + if(vnet_buffer(p1)->map_t.mtu < p1->current_length) { + vnet_buffer(p1)->ip_frag.mtu = vnet_buffer(p1)->map_t.mtu; + vnet_buffer(p1)->ip_frag.header_offset = 0; + vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, + to_next, n_left_to_next, pi0, pi1, + next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip6_header_t *ip60; + ip6_frag_hdr_t *frag0; + ip4_header_t *ip40; + u16 frag_id0; + u8 frag_more0; + u16 frag_offset0; + ip6_mapt_fragmented_next_t next0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + frag0 = (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + ip40 = (ip4_header_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + + frag_id0 = frag_id_6to4(frag0->identification); + frag_more0 = ip6_frag_hdr_more(frag0); + frag_offset0 = ip6_frag_hdr_offset(frag0); + + ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr; + ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr; + ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip40->tos = ip6_translate_tos(ip60); + ip40->length = u16_net_add(ip60->payload_length, + sizeof(*ip40) - vnet_buffer(p0)->map_t.v6.l4_offset + sizeof(*ip60)); + ip40->fragment_id = frag_id0; + ip40->flags_and_fragment_offset = + clib_host_to_net_u16(frag_offset0 | (frag_more0?IP4_HEADER_FLAG_MORE_FRAGMENTS:0)); + ip40->ttl = ip60->hop_limit; + ip40->protocol = (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6)? + IP_PROTOCOL_ICMP:vnet_buffer(p0)->map_t.v6.l4_protocol; + ip40->checksum = ip4_header_checksum(ip40); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + //Send to fragmentation node if necessary + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static uword +ip6_map_t_tcp_udp (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP6_MAP_T_DUAL_LOOP + while(n_left_from >= 4 && n_left_to_next >= 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip6_header_t *ip60, *ip61; + ip_csum_t csum0, csum1; + ip4_header_t *ip40, *ip41; + u16 fragment_id0, flags0, *checksum0, + fragment_id1, flags1, *checksum1; + ip6_mapt_tcp_udp_next_t next0, next1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip60 = vlib_buffer_get_current(p0); + ip61 = vlib_buffer_get_current(p1); + ip40 = (ip4_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + ip41 = (ip4_header_t *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p1, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40)); + checksum0 = (u16 *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.checksum_offset); + checksum1 = (u16 *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.checksum_offset); + + csum0 = ip_csum_sub_even(*checksum0, ip60->src_address.as_u64[0]); + csum1 = ip_csum_sub_even(*checksum1, ip61->src_address.as_u64[0]); + csum0 = ip_csum_sub_even(csum0, ip60->src_address.as_u64[1]); + csum1 = ip_csum_sub_even(csum1, ip61->src_address.as_u64[1]); + csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[0]); + csum1 = ip_csum_sub_even(csum0, ip61->dst_address.as_u64[0]); + csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[1]); + csum1 = ip_csum_sub_even(csum1, ip61->dst_address.as_u64[1]); + csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.daddr); + csum1 = ip_csum_add_even(csum1, vnet_buffer(p1)->map_t.v6.daddr); + csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.saddr); + csum1 = ip_csum_add_even(csum1, vnet_buffer(p1)->map_t.v6.saddr); + *checksum0 = ip_csum_fold(csum0); + *checksum1 = ip_csum_fold(csum1); + + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset)) { + ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + fragment_id0 = frag_id_6to4(hdr->identification); + flags0 = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS); + } else { + fragment_id0 = 0; + flags0 = 0; + } + + if (PREDICT_FALSE(vnet_buffer(p1)->map_t.v6.frag_offset)) { + ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset); + fragment_id1 = frag_id_6to4(hdr->identification); + flags1 = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS); + } else { + fragment_id1 = 0; + flags1 = 0; + } + + ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr; + ip41->dst_address.as_u32 = vnet_buffer(p1)->map_t.v6.daddr; + ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr; + ip41->src_address.as_u32 = vnet_buffer(p1)->map_t.v6.saddr; + ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip41->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip40->tos = ip6_translate_tos(ip60); + ip41->tos = ip6_translate_tos(ip61); + ip40->length = u16_net_add(ip60->payload_length, + sizeof(*ip40) + sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset); + ip41->length = u16_net_add(ip61->payload_length, + sizeof(*ip40) + sizeof(*ip60) - vnet_buffer(p1)->map_t.v6.l4_offset); + ip40->fragment_id = fragment_id0; + ip41->fragment_id = fragment_id1; + ip40->flags_and_fragment_offset = flags0; + ip41->flags_and_fragment_offset = flags1; + ip40->ttl = ip60->hop_limit; + ip41->ttl = ip61->hop_limit; + ip40->protocol = vnet_buffer(p0)->map_t.v6.l4_protocol; + ip41->protocol = vnet_buffer(p1)->map_t.v6.l4_protocol; + ip40->checksum = ip4_header_checksum(ip40); + ip41->checksum = ip4_header_checksum(ip41); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } + + if(vnet_buffer(p1)->map_t.mtu < p1->current_length) { + vnet_buffer(p1)->ip_frag.mtu = vnet_buffer(p1)->map_t.mtu; + vnet_buffer(p1)->ip_frag.header_offset = 0; + vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip6_header_t *ip60; + u16 *checksum0; + ip_csum_t csum0; + ip4_header_t *ip40; + u16 fragment_id0; + u16 flags0; + ip6_mapt_tcp_udp_next_t next0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + ip40 = (ip4_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + checksum0 = (u16 *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.checksum_offset); + + //TODO: This can probably be optimized + csum0 = ip_csum_sub_even(*checksum0, ip60->src_address.as_u64[0]); + csum0 = ip_csum_sub_even(csum0, ip60->src_address.as_u64[1]); + csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[0]); + csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[1]); + csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.daddr); + csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.saddr); + *checksum0 = ip_csum_fold(csum0); + + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset)) { + //Only the first fragment + ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + fragment_id0 = frag_id_6to4(hdr->identification); + flags0 = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS); + } else { + fragment_id0 = 0; + flags0 = 0; + } + + ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr; + ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr; + ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip40->tos = ip6_translate_tos(ip60); + ip40->length = u16_net_add(ip60->payload_length, + sizeof(*ip40) + sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset); + ip40->fragment_id = fragment_id0; + ip40->flags_and_fragment_offset = flags0; + ip40->ttl = ip60->hop_limit; + ip40->protocol = vnet_buffer(p0)->map_t.v6.l4_protocol; + ip40->checksum = ip4_header_checksum(ip40); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + //Send to fragmentation node if necessary + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static_always_inline void +ip6_map_t_classify(vlib_buffer_t *p0, ip6_header_t *ip60, + map_domain_t *d0, i32 *src_port0, + u8 *error0, ip6_mapt_next_t *next0, + u32 l4_len0, ip6_frag_hdr_t *frag0) +{ + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset && + ip6_frag_hdr_offset(frag0))) { + *next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; + if(d0->ea_bits_len == 0 && d0->rules) { + *src_port0 = 0; + } else { + *src_port0 = ip6_map_fragment_get(ip60, frag0, d0); + *error0 = (*src_port0 != -1) ? *error0 : MAP_ERROR_FRAGMENT_DROPPED; + } + } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP)) { + *error0 = l4_len0 < sizeof(tcp_header_t) ? MAP_ERROR_MALFORMED : *error0; + vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 16; + *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + *src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset)); + } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP)) { + *error0 = l4_len0 < sizeof(udp_header_t) ? MAP_ERROR_MALFORMED : *error0; + vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 6; + *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + *src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset)); + } else if (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6) { + *error0 = l4_len0 < sizeof(icmp46_header_t) ? MAP_ERROR_MALFORMED : *error0; + *next0 = IP6_MAPT_NEXT_MAPT_ICMP; + if(d0->ea_bits_len == 0 && d0->rules) { + *src_port0 = 0; + } else if (((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_reply || + ((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_request) { + *src_port0 = (i32) *((u16 *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset + 6)); + } + } else { + //TODO: In case of 1:1 mapping, it might be possible to do something with those packets. + *error0 = MAP_ERROR_BAD_PROTOCOL; + } +} + +static uword +ip6_map_t (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_t_node.index); + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP6_MAP_T_DUAL_LOOP + while (n_left_from >= 4 && n_left_to_next >=2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip6_header_t *ip60, *ip61; + u8 error0, error1; + ip6_mapt_next_t next0, next1; + u32 l4_len0, l4_len1; + i32 src_port0, src_port1; + map_domain_t *d0, *d1; + ip6_frag_hdr_t *frag0, *frag1; + u32 saddr0, saddr1; + next0 = next1 = 0; //Because compiler whines + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + error0 = MAP_ERROR_NONE; + error1 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip60 = vlib_buffer_get_current(p0); + ip61 = vlib_buffer_get_current(p1); + + saddr0 = map_get_ip4(&ip60->src_address); + saddr1 = map_get_ip4(&ip61->src_address); + d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], + (ip4_address_t *)&saddr0, + &vnet_buffer(p0)->map_t.map_domain_index, &error0); + d1 = ip6_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], + (ip4_address_t *)&saddr1, + &vnet_buffer(p1)->map_t.map_domain_index, &error1); + + vnet_buffer(p0)->map_t.v6.saddr = saddr0; + vnet_buffer(p1)->map_t.v6.saddr = saddr1; + vnet_buffer(p0)->map_t.v6.daddr = ip6_map_t_embedded_address(d0, &ip60->dst_address); + vnet_buffer(p1)->map_t.v6.daddr = ip6_map_t_embedded_address(d1, &ip61->dst_address); + vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + vnet_buffer(p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0; + + if (PREDICT_FALSE(ip6_parse(ip60, p0->current_length, + &(vnet_buffer(p0)->map_t.v6.l4_protocol), + &(vnet_buffer(p0)->map_t.v6.l4_offset), + &(vnet_buffer(p0)->map_t.v6.frag_offset)))) { + error0 = MAP_ERROR_MALFORMED; + next0 = IP6_MAPT_NEXT_DROP; + } + + if (PREDICT_FALSE(ip6_parse(ip61, p1->current_length, + &(vnet_buffer(p1)->map_t.v6.l4_protocol), + &(vnet_buffer(p1)->map_t.v6.l4_offset), + &(vnet_buffer(p1)->map_t.v6.frag_offset)))) { + error1 = MAP_ERROR_MALFORMED; + next1 = IP6_MAPT_NEXT_DROP; + } + + src_port0 = src_port1 = -1; + l4_len0 = (u32)clib_net_to_host_u16(ip60->payload_length) + + sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset; + l4_len1 = (u32)clib_net_to_host_u16(ip61->payload_length) + + sizeof(*ip60) - vnet_buffer(p1)->map_t.v6.l4_offset; + frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + frag1 = (ip6_frag_hdr_t *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset); + + ip6_map_t_classify(p0, ip60, d0, &src_port0, &error0, &next0, l4_len0, frag0); + ip6_map_t_classify(p1, ip61, d1, &src_port1, &error1, &next1, l4_len1, frag1); + + if (PREDICT_FALSE((src_port0 != -1) && ( + ip60->src_address.as_u64[0] != map_get_pfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0) || + ip60->src_address.as_u64[1] != map_get_sfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0)))) { + error0 = MAP_ERROR_SEC_CHECK; + } + + if (PREDICT_FALSE((src_port1 != -1) && ( + ip61->src_address.as_u64[0] != map_get_pfx_net(d1, vnet_buffer(p1)->map_t.v6.saddr, src_port1) || + ip61->src_address.as_u64[1] != map_get_sfx_net(d1, vnet_buffer(p1)->map_t.v6.saddr, src_port1)))) { + error1 = MAP_ERROR_SEC_CHECK; + } + + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset && + !ip6_frag_hdr_offset((ip6_frag_hdr_t *) + u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset))) && + (src_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) && (error0 == MAP_ERROR_NONE)) { + ip6_map_fragment_cache(ip60, + (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset), + d0, src_port0); + } + + if (PREDICT_FALSE(vnet_buffer(p1)->map_t.v6.frag_offset && + !ip6_frag_hdr_offset((ip6_frag_hdr_t *) + u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset))) && + (src_port1 != -1) && (d1->ea_bits_len != 0 || !d1->rules) && (error1 == MAP_ERROR_NONE)) { + ip6_map_fragment_cache(ip61, + (ip6_frag_hdr_t *)u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset), + d1, src_port1); + } + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip60->payload_length)); + } + + if (PREDICT_TRUE(error1 == MAP_ERROR_NONE && next1 != IP6_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, + vnet_buffer(p1)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip61->payload_length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0; + next1 = (error1 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next1; + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip6_header_t *ip60; + u8 error0; + u32 l4_len0; + i32 src_port0; + map_domain_t *d0; + ip6_frag_hdr_t *frag0; + ip6_mapt_next_t next0 = 0; + u32 saddr; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + //Save saddr in a different variable to not overwrite ip.adj_index + saddr = map_get_ip4(&ip60->src_address); + d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], + (ip4_address_t *)&saddr, + &vnet_buffer(p0)->map_t.map_domain_index, &error0); + + //FIXME: What if d0 is null + vnet_buffer(p0)->map_t.v6.saddr = saddr; + vnet_buffer(p0)->map_t.v6.daddr = ip6_map_t_embedded_address(d0, &ip60->dst_address); + vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + + if (PREDICT_FALSE(ip6_parse(ip60, p0->current_length, + &(vnet_buffer(p0)->map_t.v6.l4_protocol), + &(vnet_buffer(p0)->map_t.v6.l4_offset), + &(vnet_buffer(p0)->map_t.v6.frag_offset)))) { + error0 = MAP_ERROR_MALFORMED; + next0 = IP6_MAPT_NEXT_DROP; + } + + src_port0 = -1; + l4_len0 = (u32)clib_net_to_host_u16(ip60->payload_length) + + sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset; + frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + + + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset && + ip6_frag_hdr_offset(frag0))) { + src_port0 = ip6_map_fragment_get(ip60, frag0, d0); + error0 = (src_port0 != -1) ? error0 : MAP_ERROR_FRAGMENT_MEMORY; + next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; + } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP)) { + error0 = l4_len0 < sizeof(tcp_header_t) ? MAP_ERROR_MALFORMED : error0; + vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 16; + next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset)); + } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP)) { + error0 = l4_len0 < sizeof(udp_header_t) ? MAP_ERROR_MALFORMED : error0; + vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 6; + next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset)); + } else if (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6) { + error0 = l4_len0 < sizeof(icmp46_header_t) ? MAP_ERROR_MALFORMED : error0; + next0 = IP6_MAPT_NEXT_MAPT_ICMP; + if (((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_reply || + ((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_request) + src_port0 = (i32) *((u16 *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset + 6)); + } else { + //TODO: In case of 1:1 mapping, it might be possible to do something with those packets. + error0 = MAP_ERROR_BAD_PROTOCOL; + } + + //Security check + if (PREDICT_FALSE((src_port0 != -1) && ( + ip60->src_address.as_u64[0] != map_get_pfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0) || + ip60->src_address.as_u64[1] != map_get_sfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0)))) { + //Security check when src_port0 is not zero (non-first fragment, UDP or TCP) + error0 = MAP_ERROR_SEC_CHECK; + } + + //Fragmented first packet needs to be cached for following packets + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset && + !ip6_frag_hdr_offset((ip6_frag_hdr_t *) + u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset))) && + (src_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) && (error0 == MAP_ERROR_NONE)) { + ip6_map_fragment_cache(ip60, + (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset), + d0, src_port0); + } + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip60->payload_length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static char *map_t_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = { + .function = ip6_map_t_fragmented, + .name = "ip6-map-t-fragmented", + .vector_size = sizeof (u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_FRAGMENTED_N_NEXT, + .next_nodes = { + [IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = { + .function = ip6_map_t_icmp, + .name = "ip6-map-t-icmp", + .vector_size = sizeof (u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_ICMP_N_NEXT, + .next_nodes = { + [IP6_MAPT_ICMP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_ICMP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_ICMP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = { + .function = ip6_map_t_tcp_udp, + .name = "ip6-map-t-tcp-udp", + .vector_size = sizeof (u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_TCP_UDP_N_NEXT, + .next_nodes = { + [IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_t_node) = { + .function = ip6_map_t, + .name = "ip6-map-t", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_N_NEXT, + .next_nodes = { + [IP6_MAPT_NEXT_MAPT_TCP_UDP] = "ip6-map-t-tcp-udp", + [IP6_MAPT_NEXT_MAPT_ICMP] = "ip6-map-t-icmp", + [IP6_MAPT_NEXT_MAPT_FRAGMENTED] = "ip6-map-t-fragmented", + [IP6_MAPT_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/ip6_sixrd.c b/vnet/vnet/map/ip6_sixrd.c new file mode 100644 index 00000000000..0bd0cf3a303 --- /dev/null +++ b/vnet/vnet/map/ip6_sixrd.c @@ -0,0 +1,129 @@ +/*--------------------------------------------------------------------------- + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +/* + * Defines used for testing various optimisation schemes + */ +#define SIXRD_ENCAP_DUAL 0 + +#include "sixrd.h" + +vlib_node_registration_t ip6_sixrd_node; + +typedef enum { + IP6_SIXRD_NEXT_IP4_LOOKUP, + IP6_SIXRD_NEXT_DROP, + IP6_SIXRD_N_NEXT, +} ip6_sixrd_next_t; + +/* + * ip6_sixrd + */ +static uword +ip6_sixrd (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_sixrd_node.index); + u32 encap = 0; + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + sixrd_domain_t *d0; + u8 error0 = SIXRD_ERROR_NONE; + ip6_header_t *ip60; + ip4_header_t *ip4h0; + u32 next0 = IP6_SIXRD_NEXT_IP4_LOOKUP; + u32 sixrd_domain_index0 = ~0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + // p0->current_length = clib_net_to_host_u16(ip40->length); + d0 = ip6_sixrd_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &sixrd_domain_index0); + ASSERT(d0); + + /* SIXRD calc */ + u64 dal60 = clib_net_to_host_u64(ip60->dst_address.as_u64[0]); + u32 da40 = sixrd_get_addr(d0, dal60); + u16 len = clib_net_to_host_u16(ip60->payload_length) + 60; + if (da40 == 0) error0 = SIXRD_ERROR_UNKNOWN; + + /* construct ipv4 header */ + vlib_buffer_advance(p0, - (sizeof(ip4_header_t))); + ip4h0 = vlib_buffer_get_current(p0); + vnet_buffer(p0)->sw_if_index[VLIB_TX] = (u32)~0; + ip4h0->ip_version_and_header_length = 0x45; + ip4h0->tos = 0; + ip4h0->length = clib_host_to_net_u16(len); + ip4h0->fragment_id = 0; + ip4h0->flags_and_fragment_offset = 0; + ip4h0->ttl = 0x40; + ip4h0->protocol = IP_PROTOCOL_IPV6; + ip4h0->src_address = d0->ip4_src; + ip4h0->dst_address.as_u32 = clib_host_to_net_u32(da40); + ip4h0->checksum = ip4_header_checksum(ip4h0); + + next0 = error0 == SIXRD_ERROR_NONE ? IP6_SIXRD_NEXT_IP4_LOOKUP : IP6_SIXRD_NEXT_DROP; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + sixrd_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->sixrd_domain_index = sixrd_domain_index0; + } + + p0->error = error_node->errors[error0]; + if (PREDICT_TRUE(error0 == SIXRD_ERROR_NONE)) encap++; + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter(vm, ip6_sixrd_node.index, SIXRD_ERROR_ENCAPSULATED, encap); + + return frame->n_vectors; +} + +static char *sixrd_error_strings[] = { +#define _(sym,string) string, + foreach_sixrd_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip6_sixrd_node) = { + .function = ip6_sixrd, + .name = "ip6-sixrd", + .vector_size = sizeof(u32), + .format_trace = format_sixrd_trace, + .n_errors = SIXRD_N_ERROR, + .error_strings = sixrd_error_strings, + .n_next_nodes = IP6_SIXRD_N_NEXT, + .next_nodes = { + [IP6_SIXRD_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_SIXRD_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/map.c b/vnet/vnet/map/map.c new file mode 100644 index 00000000000..b0cab660876 --- /dev/null +++ b/vnet/vnet/map/map.c @@ -0,0 +1,1634 @@ +/* + * map.c : MAP support + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "map.h" + +/* + * This code supports the following MAP modes: + * + * Algorithmic Shared IPv4 address (ea_bits_len > 0): + * ea_bits_len + ip4_prefix > 32 + * psid_length > 0, ip6_prefix < 64, ip4_prefix <= 32 + * Algorithmic Full IPv4 address (ea_bits_len > 0): + * ea_bits_len + ip4_prefix = 32 + * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32 + * Algorithmic IPv4 prefix (ea_bits_len > 0): + * ea_bits_len + ip4_prefix < 32 + * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32 + * + * Independent Shared IPv4 address (ea_bits_len = 0): + * ip4_prefix = 32 + * psid_length > 0 + * Rule IPv6 address = 128, Rule PSID Set + * Independent Full IPv4 address (ea_bits_len = 0): + * ip4_prefix = 32 + * psid_length = 0, ip6_prefix = 128 + * Independent IPv4 prefix (ea_bits_len = 0): + * ip4_prefix < 32 + * psid_length = 0, ip6_prefix = 128 + * + */ + +/* + * This code supports MAP-T: + * + * With DMR prefix length equal to 96. + * + */ + + +i32 +ip4_get_port (ip4_header_t *ip, map_dir_e dir, u16 buffer_len) +{ + //TODO: use buffer length + if (ip->ip_version_and_header_length != 0x45 || + ip4_get_fragment_offset(ip)) + return -1; + + if (PREDICT_TRUE((ip->protocol == IP_PROTOCOL_TCP) || + (ip->protocol == IP_PROTOCOL_UDP))) { + udp_header_t *udp = (void *)(ip + 1); + return (dir == MAP_SENDER) ? udp->src_port : udp->dst_port; + } else if (ip->protocol == IP_PROTOCOL_ICMP) { + icmp46_header_t *icmp = (void *)(ip + 1); + if (icmp->type == ICMP4_echo_request || + icmp->type == ICMP4_echo_reply) { + return *((u16 *)(icmp + 1)); + } else if (clib_net_to_host_u16(ip->length) >= 64) { + ip = (ip4_header_t *)(icmp + 2); + if (PREDICT_TRUE((ip->protocol == IP_PROTOCOL_TCP) || + (ip->protocol == IP_PROTOCOL_UDP))) { + udp_header_t *udp = (void *)(ip + 1); + return (dir == MAP_SENDER) ? udp->dst_port : udp->src_port; + } else if (ip->protocol == IP_PROTOCOL_ICMP) { + icmp46_header_t *icmp = (void *)(ip + 1); + if (icmp->type == ICMP4_echo_request || + icmp->type == ICMP4_echo_reply) { + return *((u16 *)(icmp + 1)); + } + } + } + } + return -1; +} + +i32 +ip6_get_port (ip6_header_t *ip6, map_dir_e dir, u16 buffer_len) +{ + u8 l4_protocol; + u16 l4_offset; + u16 frag_offset; + u8 *l4; + + if (ip6_parse(ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset)) + return -1; + + //TODO: Use buffer length + + if (frag_offset && + ip6_frag_hdr_offset(((ip6_frag_hdr_t *)u8_ptr_add(ip6, frag_offset)))) + return -1; //Can't deal with non-first fragment for now + + l4 = u8_ptr_add(ip6, l4_offset); + if (l4_protocol == IP_PROTOCOL_TCP || + l4_protocol == IP_PROTOCOL_UDP) { + return (dir == MAP_SENDER) ? ((udp_header_t *)(l4))->src_port : ((udp_header_t *)(l4))->dst_port; + } else if (l4_protocol == IP_PROTOCOL_ICMP6) { + icmp46_header_t *icmp = (icmp46_header_t *)(l4); + if (icmp->type == ICMP6_echo_request) { + return (dir == MAP_SENDER) ? ((u16*)(icmp))[2] : -1; + } else if (icmp->type == ICMP6_echo_reply) { + return (dir == MAP_SENDER) ? -1 : ((u16*)(icmp))[2]; + } + } + return -1; +} + + +int +map_create_domain (ip4_address_t *ip4_prefix, + u8 ip4_prefix_len, + ip6_address_t *ip6_prefix, + u8 ip6_prefix_len, + ip6_address_t *ip6_src, + u8 ip6_src_len, + u8 ea_bits_len, + u8 psid_offset, + u8 psid_length, + u32 *map_domain_index, + u16 mtu, + u8 flags) +{ + map_main_t *mm = &map_main; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + map_domain_t *d; + ip_adjacency_t adj; + ip4_add_del_route_args_t args4; + ip6_add_del_route_args_t args6; + u8 suffix_len; + uword *p; + + /* EA bits must be within the first 64 bits */ + if (ea_bits_len > 0 && (ip6_prefix_len + ea_bits_len) > 64) + return -1; + + /* Sanity check on the src prefix length */ + if (flags & MAP_DOMAIN_TRANSLATION) { + if (ip6_src_len != 96) { + clib_warning("MAP-T only supports ip6_src_len = 96 for now."); + return -1; + } + } else { + if (ip6_src_len != 128) { + clib_warning("MAP-E requires a BR address, not a prefix (ip6_src_len should be 128)."); + return -1; + } + } + + /* Get domain index */ + pool_get_aligned(mm->domains, d, CLIB_CACHE_LINE_BYTES); + memset(d, 0, sizeof (*d)); + *map_domain_index = d - mm->domains; + + /* Init domain struct */ + d->ip4_prefix.as_u32 = ip4_prefix->as_u32; + d->ip4_prefix_len = ip4_prefix_len; + d->ip6_prefix = *ip6_prefix; + d->ip6_prefix_len = ip6_prefix_len; + d->ip6_src = *ip6_src; + d->ip6_src_len = ip6_src_len; + d->ea_bits_len = ea_bits_len; + d->psid_offset = psid_offset; + d->psid_length = psid_length; + d->mtu = mtu; + d->flags = flags; + + /* How many, and which bits to grab from the IPv4 DA */ + if (ip4_prefix_len + ea_bits_len < 32) { + d->flags |= MAP_DOMAIN_PREFIX; + suffix_len = d->suffix_shift = 32 - ip4_prefix_len - ea_bits_len; + } else { + d->suffix_shift = 0; + suffix_len = 32 - ip4_prefix_len; + } + d->suffix_mask = (1<<suffix_len) - 1; + + d->psid_shift = 16 - psid_length - psid_offset; + d->psid_mask = (1 << d->psid_length) - 1; + d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length; + + /* Init IP adjacency */ + memset(&adj, 0, sizeof(adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = (d->flags & MAP_DOMAIN_TRANSLATION) ? IP_LOOKUP_NEXT_MAP_T : IP_LOOKUP_NEXT_MAP; + p = (uword *)&adj.rewrite_data[0]; + *p = (uword) (*map_domain_index); + + if (ip4_get_route(im4, 0, 0, (u8 *)ip4_prefix, ip4_prefix_len)) { + clib_warning("IPv4 route already defined: %U/%d", format_ip4_address, ip4_prefix, ip4_prefix_len); + pool_put(mm->domains, d); + return -1; + } + + /* Create ip4 adjacency */ + memset(&args4, 0, sizeof(args4)); + args4.table_index_or_table_id = 0; + args4.flags = IP4_ROUTE_FLAG_ADD; + args4.dst_address.as_u32 = ip4_prefix->as_u32; + args4.dst_address_length = ip4_prefix_len; + + args4.adj_index = ~0; + args4.add_adj = &adj; + args4.n_add_adj = 1; + ip4_add_del_route(im4, &args4); + + /* Multiple MAP domains may share same source IPv6 TEP */ + u32 ai = ip6_get_route(im6, 0, 0, ip6_src, ip6_src_len); + if (ai > 0) { + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_adjacency_t *adj6 = ip_get_adjacency(lm6, ai); + if (adj6->lookup_next_index != IP_LOOKUP_NEXT_MAP && + adj6->lookup_next_index != IP_LOOKUP_NEXT_MAP_T) { + clib_warning("BR source address already assigned: %U", format_ip6_address, ip6_src); + pool_put(mm->domains, d); + return -1; + } + /* Shared source */ + p = (uword *)&adj6->rewrite_data[0]; + p[0] = ~0; + + /* Add refcount, so we don't accidentially delete the route underneath someone */ + p[1]++; + } else { + /* Create ip6 adjacency. */ + memset(&args6, 0, sizeof(args6)); + args6.table_index_or_table_id = 0; + args6.flags = IP6_ROUTE_FLAG_ADD; + args6.dst_address.as_u64[0] = ip6_src->as_u64[0]; + args6.dst_address.as_u64[1] = ip6_src->as_u64[1]; + args6.dst_address_length = ip6_src_len; + args6.adj_index = ~0; + args6.add_adj = &adj; + args6.n_add_adj = 1; + ip6_add_del_route(im6, &args6); + } + + /* Validate packet/byte counters */ + map_domain_counter_lock(mm); + int i; + for (i = 0; i < vec_len(mm->simple_domain_counters); i++) { + vlib_validate_simple_counter(&mm->simple_domain_counters[i], *map_domain_index); + vlib_zero_simple_counter(&mm->simple_domain_counters[i], *map_domain_index); + } + for (i = 0; i < vec_len(mm->domain_counters); i++) { + vlib_validate_combined_counter(&mm->domain_counters[i], *map_domain_index); + vlib_zero_combined_counter(&mm->domain_counters[i], *map_domain_index); + } + map_domain_counter_unlock(mm); + + return 0; +} + +/* + * map_delete_domain + */ +int +map_delete_domain (u32 map_domain_index) +{ + map_main_t *mm = &map_main; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + map_domain_t *d; + ip_adjacency_t adj; + ip4_add_del_route_args_t args4; + ip6_add_del_route_args_t args6; + + if (pool_is_free_index(mm->domains, map_domain_index)) { + clib_warning("MAP domain delete: domain does not exist: %d", map_domain_index); + return -1; + } + + d = pool_elt_at_index(mm->domains, map_domain_index); + + memset(&adj, 0, sizeof(adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = (d->flags & MAP_DOMAIN_TRANSLATION) ? IP_LOOKUP_NEXT_MAP_T : IP_LOOKUP_NEXT_MAP; + + /* Delete ip4 adjacency */ + memset(&args4, 0, sizeof(args4)); + args4.table_index_or_table_id = 0; + args4.flags = IP4_ROUTE_FLAG_DEL; + args4.dst_address.as_u32 = d->ip4_prefix.as_u32; + args4.dst_address_length = d->ip4_prefix_len; + args4.adj_index = 0; + args4.add_adj = &adj; + args4.n_add_adj = 0; + ip4_add_del_route(im4, &args4); + + /* Delete ip6 adjacency */ + u32 ai = ip6_get_route(im6, 0, 0, &d->ip6_src, d->ip6_src_len); + if (ai > 0) { + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_adjacency_t *adj6 = ip_get_adjacency(lm6, ai); + + uword *p = (uword *)&adj6->rewrite_data[0]; + /* Delete route when no other domains use this source */ + if (p[1] == 0) { + memset(&args6, 0, sizeof (args6)); + args6.table_index_or_table_id = 0; + args6.flags = IP6_ROUTE_FLAG_DEL; + args6.dst_address.as_u64[0] = d->ip6_src.as_u64[0]; + args6.dst_address.as_u64[1] = d->ip6_src.as_u64[1]; + args6.dst_address_length = d->ip6_src_len; + args6.adj_index = 0; + args6.add_adj = &adj; + args6.n_add_adj = 0; + ip6_add_del_route(im6, &args6); + } + p[1]--; + } + /* Deleting rules */ + if (d->rules) + clib_mem_free(d->rules); + + pool_put(mm->domains, d); + + return 0; +} + +int +map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t *tep, + u8 is_add) +{ + map_domain_t *d; + map_main_t *mm = &map_main; + + if (pool_is_free_index(mm->domains, map_domain_index)) { + clib_warning("MAP rule: domain does not exist: %d", map_domain_index); + return -1; + } + d = pool_elt_at_index(mm->domains, map_domain_index); + + /* Rules are only used in 1:1 independent case */ + if (d->ea_bits_len > 0) + return (-1); + + if (!d->rules) { + u32 l = (0x1 << d->psid_length) * sizeof(ip6_address_t); + d->rules = clib_mem_alloc_aligned(l, CLIB_CACHE_LINE_BYTES); + if (!d->rules) return -1; + memset(d->rules, 0, l); + } + + if (psid >= (0x1 << d->psid_length)) { + clib_warning("MAP rule: PSID outside bounds: %d [%d]", psid, 0x1 << d->psid_length); + return -1; + } + + if (is_add) { + d->rules[psid] = *tep; + } else { + memset(&d->rules[psid], 0, sizeof(ip6_address_t)); + } + return 0; +} + +#ifdef MAP_SKIP_IP6_LOOKUP +static void +map_pre_resolve (ip4_address_t *ip4, ip6_address_t *ip6) +{ + map_main_t *mm = &map_main; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + + if (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0) { + mm->adj6_index = ip6_fib_lookup_with_table(im6, 0, ip6); + clib_warning("FIB lookup results in: %u", mm->adj6_index); + } + if (ip4->as_u32 != 0) { + mm->adj4_index = ip4_fib_lookup_with_table(im4, 0, ip4, 0); + clib_warning("FIB lookup results in: %u", mm->adj4_index); + } +} +#endif + +static clib_error_t * +map_security_check_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "off")) + mm->sec_check = false; + else if (unformat(line_input, "on")) + mm->sec_check = true; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + return 0; +} + +static clib_error_t * +map_security_check_frag_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "off")) + mm->sec_check_frag = false; + else if (unformat(line_input, "on")) + mm->sec_check_frag = true; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + return 0; +} + +static clib_error_t * +map_add_domain_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t ip4_prefix; + ip6_address_t ip6_prefix; + ip6_address_t ip6_src; + u32 ip6_prefix_len, ip4_prefix_len, map_domain_index, ip6_src_len; + u32 num_m_args = 0; + /* Optional arguments */ + u32 ea_bits_len, psid_offset = 0, psid_length = 0; + u32 mtu = 0; + u8 flags = 0; + ip6_src_len = 128; + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "ip4-pfx %U/%d", unformat_ip4_address, &ip4_prefix, &ip4_prefix_len)) + num_m_args++; + else if (unformat(line_input, "ip6-pfx %U/%d", unformat_ip6_address, &ip6_prefix, &ip6_prefix_len)) + num_m_args++; + else if (unformat(line_input, "ip6-src %U/%d", unformat_ip6_address, &ip6_src, &ip6_src_len)) + num_m_args++; + else if (unformat(line_input, "ip6-src %U", unformat_ip6_address, &ip6_src)) + num_m_args++; + else if (unformat(line_input, "ea-bits-len %d", &ea_bits_len)) + num_m_args++; + else if (unformat(line_input, "psid-offset %d", &psid_offset)) + num_m_args++; + else if (unformat(line_input, "psid-len %d", &psid_length)) + num_m_args++; + else if (unformat(line_input, "mtu %d", &mtu)) + num_m_args++; + else if (unformat(line_input, "map-t")) + flags |= MAP_DOMAIN_TRANSLATION; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (num_m_args < 3) + return clib_error_return(0, "mandatory argument(s) missing"); + + map_create_domain(&ip4_prefix, ip4_prefix_len, + &ip6_prefix, ip6_prefix_len, &ip6_src, ip6_src_len, + ea_bits_len, psid_offset, psid_length, &map_domain_index, + mtu, flags); + + return 0; +} + +static clib_error_t * +map_del_domain_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 num_m_args = 0; + u32 map_domain_index; + + /* Get a line of input. */ + if (! unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "index %d", &map_domain_index)) + num_m_args++; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (num_m_args != 1) + return clib_error_return(0, "mandatory argument(s) missing"); + + map_delete_domain(map_domain_index); + + return 0; +} + +static clib_error_t * +map_add_rule_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip6_address_t tep; + u32 num_m_args = 0; + u32 psid, map_domain_index; + + /* Get a line of input. */ + if (! unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "index %d", &map_domain_index)) + num_m_args++; + else if (unformat(line_input, "psid %d", &psid)) + num_m_args++; + else if (unformat(line_input, "ip6-dst %U", unformat_ip6_address, &tep)) + num_m_args++; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (num_m_args != 3) + return clib_error_return(0, "mandatory argument(s) missing"); + + if (map_add_del_psid(map_domain_index, psid, &tep, 1) != 0) { + return clib_error_return(0, "Failing to add Mapping Rule"); + } + return 0; +} + +#if MAP_SKIP_IP6_LOOKUP +static clib_error_t * +map_pre_resolve_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t ip4nh; + ip6_address_t ip6nh; + map_main_t *mm = &map_main; + + memset(&ip4nh, 0, sizeof(ip4nh)); + memset(&ip6nh, 0, sizeof(ip6nh)); + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "ip4-nh %U", unformat_ip4_address, &ip4nh)) + mm->preresolve_ip4 = ip4nh; + else if (unformat(line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh)) + mm->preresolve_ip6 = ip6nh; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + map_pre_resolve(&ip4nh, &ip6nh); + + return 0; +} +#endif + +static clib_error_t * +map_icmp_relay_source_address_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t icmp_src_address; + map_main_t *mm = &map_main; + + memset(&icmp_src_address, 0, sizeof(icmp_src_address)); + + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "%U", unformat_ip4_address, &icmp_src_address)) + mm->icmp_src_address = icmp_src_address; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + return 0; +} + +static clib_error_t * +map_traffic_class_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + u32 tc = 0; + + mm->tc_copy = false; + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "copy")) + mm->tc_copy = true; + else if (unformat(line_input, "%x", &tc)) + mm->tc = tc & 0xff; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + return 0; +} + +static u8 * +format_map_domain (u8 *s, va_list *args) +{ + map_domain_t *d = va_arg(*args, map_domain_t *); + bool counters = va_arg(*args, int); + map_main_t *mm = &map_main; + ip6_address_t ip6_prefix; + + if (d->rules) + memset(&ip6_prefix, 0, sizeof(ip6_prefix)); + else + ip6_prefix = d->ip6_prefix; + + s = format(s, + "[%d] ip4-pfx %U/%d ip6-pfx %U/%d ip6-src %U/%d ea_bits_len %d psid-offset %d psid-len %d mtu %d %s", + d - mm->domains, + format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len, + format_ip6_address, &ip6_prefix, d->ip6_prefix_len, + format_ip6_address, &d->ip6_src, d->ip6_src_len, + d->ea_bits_len, d->psid_offset, d->psid_length, d->mtu, + (d->flags & MAP_DOMAIN_TRANSLATION) ? "map-t" : ""); + + if (counters) { + map_domain_counter_lock(mm); + vlib_counter_t v; + vlib_get_combined_counter(&mm->domain_counters[MAP_DOMAIN_COUNTER_TX], d - mm->domains, &v); + s = format(s, " TX: %d/%d", v.packets, v.bytes); + vlib_get_combined_counter(&mm->domain_counters[MAP_DOMAIN_COUNTER_RX], d - mm->domains, &v); + s = format(s, " RX: %d/%d", v.packets, v.bytes); + map_domain_counter_unlock(mm); + } + + if (d->rules) { + int i; + ip6_address_t dst; + for (i = 0; i < (0x1 << d->psid_length); i++) { + dst = d->rules[i]; + if (dst.as_u64[0] == 0 && dst.as_u64[1] == 0 ) + continue; + s = format(s, + " rule psid: %d ip6-dst %U\n", i, format_ip6_address, &dst); + } + } + return s; +} + +static u8 * +format_map_ip4_reass (u8 *s, va_list *args) +{ + map_main_t *mm = &map_main; + map_ip4_reass_t *r = va_arg(*args, map_ip4_reass_t *); + map_ip4_reass_key_t *k = &r->key; + f64 now = vlib_time_now(mm->vlib_main); + f64 lifetime = (((f64)mm->ip4_reass_conf_lifetime_ms) / 1000); + f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1; + s = format(s, + "ip4-reass src=%U dst=%U protocol=%d identifier=%d port=%d lifetime=%.3lf\n", + format_ip4_address, &k->src.as_u8, format_ip4_address, &k->dst.as_u8, + k->protocol, clib_net_to_host_u16(k->fragment_id), (r->port >= 0)?clib_net_to_host_u16(r->port):-1, dt); + return s; +} + +static u8 * +format_map_ip6_reass (u8 *s, va_list *args) +{ + map_main_t *mm = &map_main; + map_ip6_reass_t *r = va_arg(*args, map_ip6_reass_t *); + map_ip6_reass_key_t *k = &r->key; + f64 now = vlib_time_now(mm->vlib_main); + f64 lifetime = (((f64)mm->ip6_reass_conf_lifetime_ms) / 1000); + f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1; + s = format(s, + "ip6-reass src=%U dst=%U protocol=%d identifier=%d lifetime=%.3lf\n", + format_ip6_address, &k->src.as_u8, format_ip6_address, &k->dst.as_u8, + k->protocol, clib_net_to_host_u32(k->fragment_id), dt); + return s; +} + +static clib_error_t * +show_map_domain_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + map_domain_t *d; + bool counters = false; + u32 map_domain_index = ~0; + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "counters")) + counters = true; + else if (unformat(line_input, "index %d", &map_domain_index)) + ; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (pool_elts(mm->domains) == 0) + vlib_cli_output(vm, "No MAP domains are configured..."); + + if (map_domain_index == ~0) { + pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_map_domain, d, counters);})); + } else { + if (pool_is_free_index(mm->domains, map_domain_index)) { + return clib_error_return(0, "MAP domain does not exists %d", map_domain_index); + } + + d = pool_elt_at_index(mm->domains, map_domain_index); + vlib_cli_output(vm, "%U", format_map_domain, d, counters); + } + + return 0; +} + +static clib_error_t * +show_map_fragments_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + map_main_t *mm = &map_main; + map_ip4_reass_t *f4; + map_ip6_reass_t *f6; + + pool_foreach(f4, mm->ip4_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip4_reass, f4);})); + pool_foreach(f6, mm->ip6_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip6_reass, f6);})); + return (0); +} + +u64 +map_error_counter_get (u32 node_index, map_error_t map_error) +{ + vlib_main_t *vm = vlib_get_main(); + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, node_index); + vlib_error_main_t *em = &vm->error_main; + vlib_error_t e = error_node->errors[map_error]; + vlib_node_t *n = vlib_get_node(vm, node_index); + u32 ci; + + ci = vlib_error_get_code(e); + ASSERT (ci < n->n_errors); + ci += n->error_heap_index; + + return (em->counters[ci]); +} + +static clib_error_t * +show_map_stats_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + map_main_t *mm = &map_main; + map_domain_t *d; + int domains = 0, rules = 0, domaincount = 0, rulecount = 0; + if (pool_elts (mm->domains) == 0) + vlib_cli_output(vm, "No MAP domains are configured..."); + + pool_foreach(d, mm->domains, ({ + if (d->rules) { + rulecount+= 0x1 << d->psid_length; + rules += sizeof(ip6_address_t) * 0x1 << d->psid_length; + } + domains += sizeof(*d); + domaincount++; + })); + + vlib_cli_output(vm, "MAP domains structure: %d\n", sizeof (map_domain_t)); + vlib_cli_output(vm, "MAP domains: %d (%d bytes)\n", domaincount, domains); + vlib_cli_output(vm, "MAP rules: %d (%d bytes)\n", rulecount, rules); + vlib_cli_output(vm, "Total: %d bytes)\n", rules + domains); + +#if MAP_SKIP_IP6_LOOKUP + vlib_cli_output(vm, "MAP pre-resolve: IP6 next-hop: %U (%u), IP4 next-hop: %U (%u)\n", + format_ip6_address, &mm->preresolve_ip6, mm->adj6_index, + format_ip4_address, &mm->preresolve_ip4, mm->adj4_index); +#endif + + if (mm->tc_copy) + vlib_cli_output(vm, "MAP traffic-class: copy"); + else + vlib_cli_output(vm, "MAP traffic-class: %x", mm->tc); + + vlib_cli_output(vm, "MAP IPv6 inbound security check: %s Fragments: %s", mm->sec_check ? "enabled" : "disabled", + mm->sec_check_frag ? "enabled" : "disabled"); + + + /* + * Counters + */ + vlib_combined_counter_main_t *cm = mm->domain_counters; + u64 total_pkts[MAP_N_DOMAIN_COUNTER]; + u64 total_bytes[MAP_N_DOMAIN_COUNTER]; + int which, i; + vlib_counter_t v; + + memset (total_pkts, 0, sizeof (total_pkts)); + memset (total_bytes, 0, sizeof (total_bytes)); + + map_domain_counter_lock (mm); + vec_foreach (cm, mm->domain_counters) { + which = cm - mm->domain_counters; + + for (i = 0; i < vec_len (cm->maxi); i++) { + vlib_get_combined_counter (cm, i, &v); + total_pkts[which] += v.packets; + total_bytes[which] += v.bytes; + } + } + map_domain_counter_unlock (mm); + + vlib_cli_output(vm, "Encapsulated packets: %d bytes: %d\n", total_pkts[MAP_DOMAIN_COUNTER_TX], + total_bytes[MAP_DOMAIN_COUNTER_TX]); + vlib_cli_output(vm, "Decapsulated packets: %d bytes: %d\n", total_pkts[MAP_DOMAIN_COUNTER_RX], + total_bytes[MAP_DOMAIN_COUNTER_RX]); + + vlib_cli_output(vm, "ICMP relayed packets: %d\n", vlib_get_simple_counter(&mm->icmp_relayed, 0)); + + return 0; +} + +static clib_error_t * +map_params_reass_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 lifetime = ~0; + f64 ht_ratio = (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1); + u32 pool_size = ~0; + u64 buffers = ~(0ull); + u8 ip4 = 0, ip6 = 0; + + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (!unformat(line_input, "lifetime %u", &lifetime) && + !unformat(line_input, "ht-ratio %lf", &ht_ratio) && + !unformat(line_input, "pool-size %u", &pool_size) && + !unformat(line_input, "buffers %llu", &buffers) && + !((unformat(line_input, "ip4")) && (ip4 = 1)) && + !((unformat(line_input, "ip6")) && (ip6 = 1))) { + unformat_free(line_input); + return clib_error_return(0, "invalid input"); + } + } + unformat_free(line_input); + + if (!ip4 && !ip6) + return clib_error_return(0, "must specify ip4 and/or ip6"); + + if (ip4) { + if (pool_size != ~0 && pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX) + return clib_error_return(0, "invalid ip4-reass pool-size ( > %d)", MAP_IP4_REASS_CONF_POOL_SIZE_MAX); + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1) && ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX) + return clib_error_return(0, "invalid ip4-reass ht-ratio ( > %d)", MAP_IP4_REASS_CONF_HT_RATIO_MAX); + if (lifetime != ~0 && lifetime > MAP_IP4_REASS_CONF_LIFETIME_MAX) + return clib_error_return(0, "invalid ip4-reass lifetime ( > %d)", MAP_IP4_REASS_CONF_LIFETIME_MAX); + if (buffers != ~(0ull) && buffers > MAP_IP4_REASS_CONF_BUFFERS_MAX) + return clib_error_return(0, "invalid ip4-reass buffers ( > %ld)", MAP_IP4_REASS_CONF_BUFFERS_MAX); + } + + if (ip6) { + if (pool_size != ~0 && pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX) + return clib_error_return(0, "invalid ip6-reass pool-size ( > %d)", MAP_IP6_REASS_CONF_POOL_SIZE_MAX); + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1) && ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX) + return clib_error_return(0, "invalid ip6-reass ht-log2len ( > %d)", MAP_IP6_REASS_CONF_HT_RATIO_MAX); + if (lifetime != ~0 && lifetime > MAP_IP6_REASS_CONF_LIFETIME_MAX) + return clib_error_return(0, "invalid ip6-reass lifetime ( > %d)", MAP_IP6_REASS_CONF_LIFETIME_MAX); + if (buffers != ~(0ull) && buffers > MAP_IP6_REASS_CONF_BUFFERS_MAX) + return clib_error_return(0, "invalid ip6-reass buffers ( > %ld)", MAP_IP6_REASS_CONF_BUFFERS_MAX); + } + + if (ip4) { + u32 reass = 0, packets = 0; + if (pool_size != ~0) { + if (map_ip4_reass_conf_pool_size(pool_size, &reass, &packets)) { + vlib_cli_output(vm, "Could not set ip4-reass pool-size"); + } else { + vlib_cli_output(vm, "Setting ip4-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets); + } + } + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1)) { + if (map_ip4_reass_conf_ht_ratio(ht_ratio, &reass, &packets)) { + vlib_cli_output(vm, "Could not set ip4-reass ht-log2len"); + } else { + vlib_cli_output(vm, "Setting ip4-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets); + } + } + if (lifetime != ~0) { + if (map_ip4_reass_conf_lifetime(lifetime)) + vlib_cli_output(vm, "Could not set ip4-reass lifetime"); + else + vlib_cli_output(vm, "Setting ip4-reass lifetime"); + } + if (buffers != ~(0ull)) { + if (map_ip4_reass_conf_buffers(buffers)) + vlib_cli_output(vm, "Could not set ip4-reass buffers"); + else + vlib_cli_output(vm, "Setting ip4-reass buffers"); + } + + if (map_main.ip4_reass_conf_buffers > + map_main.ip4_reass_conf_pool_size * MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) { + vlib_cli_output(vm, "Note: 'ip4-reass buffers' > pool-size * max-fragments-per-reassembly."); + } + } + + if (ip6) { + u32 reass = 0, packets = 0; + if (pool_size != ~0) { + if (map_ip6_reass_conf_pool_size(pool_size, &reass, &packets)) { + vlib_cli_output(vm, "Could not set ip6-reass pool-size"); + } else { + vlib_cli_output(vm, "Setting ip6-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets); + } + } + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1)) { + if (map_ip6_reass_conf_ht_ratio(ht_ratio, &reass, &packets)) { + vlib_cli_output(vm, "Could not set ip6-reass ht-log2len"); + } else { + vlib_cli_output(vm, "Setting ip6-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets); + } + } + if (lifetime != ~0) { + if (map_ip6_reass_conf_lifetime(lifetime)) + vlib_cli_output(vm, "Could not set ip6-reass lifetime"); + else + vlib_cli_output(vm, "Setting ip6-reass lifetime"); + } + if (buffers != ~(0ull)) { + if (map_ip6_reass_conf_buffers(buffers)) + vlib_cli_output(vm, "Could not set ip6-reass buffers"); + else + vlib_cli_output(vm, "Setting ip6-reass buffers"); + } + + if (map_main.ip6_reass_conf_buffers > + map_main.ip6_reass_conf_pool_size * MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) { + vlib_cli_output(vm, "Note: 'ip6-reass buffers' > pool-size * max-fragments-per-reassembly."); + } + } + + return 0; +} + + +/* + * packet trace format function + */ +u8 * +format_map_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *); + map_trace_t *t = va_arg (*args, map_trace_t *); + u32 map_domain_index = t->map_domain_index; + u16 port = t->port; + + s = format(s, "MAP domain index: %d L4 port: %u", map_domain_index, clib_net_to_host_u16(port)); + + return s; +} + +static_always_inline map_ip4_reass_t * +map_ip4_reass_lookup(map_ip4_reass_key_t *k, u32 bucket, f64 now) +{ + map_main_t *mm = &map_main; + u32 ri = mm->ip4_reass_hash_table[bucket]; + while(ri != MAP_REASS_INDEX_NONE) { + map_ip4_reass_t * r = pool_elt_at_index(mm->ip4_reass_pool, ri); + if (r->key.as_u64[0] == k->as_u64[0] && + r->key.as_u64[1] == k->as_u64[1] && + now < r->ts + (((f64)mm->ip4_reass_conf_lifetime_ms) / 1000)) { + return r; + } + ri = r->bucket_next; + } + return NULL; +} + +#define map_ip4_reass_pool_index(r) (r - map_main.ip4_reass_pool) + +void +map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop) +{ + map_main_t *mm = &map_main; + map_ip4_reass_get_fragments(r, pi_to_drop); + + // Unlink in hash bucket + map_ip4_reass_t *r2 = NULL; + u32 r2i = mm->ip4_reass_hash_table[r->bucket]; + while (r2i != map_ip4_reass_pool_index(r)) { + ASSERT(r2i != MAP_REASS_INDEX_NONE); + r2 = pool_elt_at_index(mm->ip4_reass_pool, r2i); + r2i = r2->bucket_next; + } + if (r2) { + r2->bucket_next = r->bucket_next; + } else { + mm->ip4_reass_hash_table[r->bucket] = r->bucket_next; + } + + // Unlink in list + if (r->fifo_next == map_ip4_reass_pool_index(r)) { + mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; + } else { + if(mm->ip4_reass_fifo_last == map_ip4_reass_pool_index(r)) + mm->ip4_reass_fifo_last = r->fifo_prev; + pool_elt_at_index(mm->ip4_reass_pool, r->fifo_prev)->fifo_next = r->fifo_next; + pool_elt_at_index(mm->ip4_reass_pool, r->fifo_next)->fifo_prev = r->fifo_prev; + } + + pool_put(mm->ip4_reass_pool, r); + mm->ip4_reass_allocated--; +} + +map_ip4_reass_t * +map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id, + u8 protocol, u32 **pi_to_drop) +{ + map_ip4_reass_t * r; + map_main_t *mm = &map_main; + map_ip4_reass_key_t k = {.src.data_u32 = src, + .dst.data_u32 = dst, + .fragment_id = fragment_id, + .protocol = protocol }; + + u32 h = 0; + h = crc_u32(k.as_u32[0], h); + h = crc_u32(k.as_u32[1], h); + h = crc_u32(k.as_u32[2], h); + h = crc_u32(k.as_u32[3], h); + h = h >> (32 - mm->ip4_reass_ht_log2len); + + f64 now = vlib_time_now(mm->vlib_main); + + //Cache garbage collection + while (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) { + map_ip4_reass_t *last = pool_elt_at_index(mm->ip4_reass_pool, mm->ip4_reass_fifo_last); + if (last->ts + (((f64)mm->ip4_reass_conf_lifetime_ms) / 1000) < now) + map_ip4_reass_free(last, pi_to_drop); + else + break; + } + + if ((r = map_ip4_reass_lookup(&k, h, now))) + return r; + + if (mm->ip4_reass_allocated >= mm->ip4_reass_conf_pool_size) + return NULL; + + pool_get(mm->ip4_reass_pool, r); + mm->ip4_reass_allocated++; + int i; + for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + r->fragments[i] = ~0; + + u32 ri = map_ip4_reass_pool_index(r); + + //Link in new bucket + r->bucket = h; + r->bucket_next = mm->ip4_reass_hash_table[h]; + mm->ip4_reass_hash_table[h] = ri; + + //Link in fifo + if(mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) { + r->fifo_next = pool_elt_at_index(mm->ip4_reass_pool, mm->ip4_reass_fifo_last)->fifo_next; + r->fifo_prev = mm->ip4_reass_fifo_last; + pool_elt_at_index(mm->ip4_reass_pool, r->fifo_prev)->fifo_next = ri; + pool_elt_at_index(mm->ip4_reass_pool, r->fifo_next)->fifo_prev = ri; + } else { + r->fifo_next = r->fifo_prev = ri; + mm->ip4_reass_fifo_last = ri; + } + + //Set other fields + r->ts = now; + r->key = k; + r->port = -1; +#ifdef MAP_IP4_REASS_COUNT_BYTES + r->expected_total = 0xffff; + r->forwarded = 0; +#endif + + return r; +} + +int +map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi) +{ + if (map_main.ip4_reass_buffered_counter >= map_main.ip4_reass_conf_buffers) + return -1; + + int i; + for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if(r->fragments[i] == ~0) { + r->fragments[i] = pi; + map_main.ip4_reass_buffered_counter++; + return 0; + } + return -1; +} + +static_always_inline map_ip6_reass_t * +map_ip6_reass_lookup(map_ip6_reass_key_t *k, u32 bucket, f64 now) +{ + map_main_t *mm = &map_main; + u32 ri = mm->ip6_reass_hash_table[bucket]; + while(ri != MAP_REASS_INDEX_NONE) { + map_ip6_reass_t * r = pool_elt_at_index(mm->ip6_reass_pool, ri); + if(now < r->ts + (((f64)mm->ip6_reass_conf_lifetime_ms) / 1000) && + r->key.as_u64[0] == k->as_u64[0] && + r->key.as_u64[1] == k->as_u64[1] && + r->key.as_u64[2] == k->as_u64[2] && + r->key.as_u64[3] == k->as_u64[3] && + r->key.as_u64[4] == k->as_u64[4]) + return r; + ri = r->bucket_next; + } + return NULL; +} + +#define map_ip6_reass_pool_index(r) (r - map_main.ip6_reass_pool) + +void +map_ip6_reass_free(map_ip6_reass_t *r, u32 **pi_to_drop) +{ + map_main_t *mm = &map_main; + int i; + for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if(r->fragments[i].pi != ~0) { + vec_add1(*pi_to_drop, r->fragments[i].pi); + r->fragments[i].pi = ~0; + map_main.ip6_reass_buffered_counter--; + } + + // Unlink in hash bucket + map_ip6_reass_t *r2 = NULL; + u32 r2i = mm->ip6_reass_hash_table[r->bucket]; + while (r2i != map_ip6_reass_pool_index(r)) { + ASSERT(r2i != MAP_REASS_INDEX_NONE); + r2 = pool_elt_at_index(mm->ip6_reass_pool, r2i); + r2i = r2->bucket_next; + } + if (r2) { + r2->bucket_next = r->bucket_next; + } else { + mm->ip6_reass_hash_table[r->bucket] = r->bucket_next; + } + + // Unlink in list + if (r->fifo_next == map_ip6_reass_pool_index(r)) { + //Single element in the list, list is now empty + mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; + } else { + if (mm->ip6_reass_fifo_last == map_ip6_reass_pool_index(r)) //First element + mm->ip6_reass_fifo_last = r->fifo_prev; + pool_elt_at_index(mm->ip6_reass_pool, r->fifo_prev)->fifo_next = r->fifo_next; + pool_elt_at_index(mm->ip6_reass_pool, r->fifo_next)->fifo_prev = r->fifo_prev; + } + + // Free from pool if necessary + pool_put(mm->ip6_reass_pool, r); + mm->ip6_reass_allocated--; +} + +map_ip6_reass_t * +map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id, + u8 protocol, u32 **pi_to_drop) +{ + map_ip6_reass_t * r; + map_main_t *mm = &map_main; + map_ip6_reass_key_t k = { + .src = *src, + .dst = *dst, + .fragment_id = fragment_id, + .protocol = protocol }; + + u32 h = 0; + int i; + for (i=0; i<10; i++) + h = crc_u32(k.as_u32[i], h); + h = h >> (32 - mm->ip6_reass_ht_log2len); + + f64 now = vlib_time_now(mm->vlib_main); + + //Cache garbage collection + while (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) { + map_ip6_reass_t *last = pool_elt_at_index(mm->ip6_reass_pool, mm->ip6_reass_fifo_last); + if (last->ts + (((f64)mm->ip6_reass_conf_lifetime_ms) / 1000) < now) + map_ip6_reass_free(last, pi_to_drop); + else + break; + } + + if ((r = map_ip6_reass_lookup(&k, h, now))) + return r; + + if (mm->ip6_reass_allocated >= mm->ip6_reass_conf_pool_size) + return NULL; + + pool_get(mm->ip6_reass_pool, r); + mm->ip6_reass_allocated++; + for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) { + r->fragments[i].pi = ~0; + r->fragments[i].next_data_len = 0; + r->fragments[i].next_data_offset = 0; + } + + u32 ri = map_ip6_reass_pool_index(r); + + //Link in new bucket + r->bucket = h; + r->bucket_next = mm->ip6_reass_hash_table[h]; + mm->ip6_reass_hash_table[h] = ri; + + //Link in fifo + if(mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) { + r->fifo_next = pool_elt_at_index(mm->ip6_reass_pool, mm->ip6_reass_fifo_last)->fifo_next; + r->fifo_prev = mm->ip6_reass_fifo_last; + pool_elt_at_index(mm->ip6_reass_pool, r->fifo_prev)->fifo_next = ri; + pool_elt_at_index(mm->ip6_reass_pool, r->fifo_next)->fifo_prev = ri; + } else { + r->fifo_next = r->fifo_prev = ri; + mm->ip6_reass_fifo_last = ri; + } + + //Set other fields + r->ts = now; + r->key = k; + r->ip4_header.ip_version_and_header_length = 0; +#ifdef MAP_IP6_REASS_COUNT_BYTES + r->expected_total = 0xffff; + r->forwarded = 0; +#endif + return r; +} + +int +map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi, + u16 data_offset, u16 next_data_offset, + u8 *data_start, u16 data_len) +{ + map_ip6_fragment_t *f = NULL, *prev_f = NULL; + u16 copied_len = (data_len > 20) ? 20 : data_len; + + if (map_main.ip6_reass_buffered_counter >= map_main.ip6_reass_conf_buffers) + return -1; + + //Lookup for fragments for the current buffer + //and the one before that + int i; + for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) { + if (data_offset && r->fragments[i].next_data_offset == data_offset) { + prev_f = &r->fragments[i]; // This is buffer for previous packet + } else if (r->fragments[i].next_data_offset == next_data_offset) { + f = &r->fragments[i]; // This is a buffer for the current packet + } else if (r->fragments[i].next_data_offset == 0) { //Available + if (f == NULL) + f = &r->fragments[i]; + else if (prev_f == NULL) + prev_f = &r->fragments[i]; + } + } + + if (!f || f->pi != ~0) + return -1; + + if (data_offset) { + if (!prev_f) + return -1; + + memcpy(prev_f->next_data, data_start, copied_len); + prev_f->next_data_len = copied_len; + prev_f->next_data_offset = data_offset; + } else { + if (((ip4_header_t *)data_start)->ip_version_and_header_length != 0x45) + return -1; + + if (r->ip4_header.ip_version_and_header_length == 0) + memcpy(&r->ip4_header, data_start, sizeof(ip4_header_t)); + } + + if(data_len > 20) { + f->next_data_offset = next_data_offset; + f->pi = pi; + map_main.ip6_reass_buffered_counter++; + } + return 0; +} + +void map_ip4_reass_reinit(u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + int i; + + if(dropped_packets) + *dropped_packets = mm->ip4_reass_buffered_counter; + if(trashed_reass) + *trashed_reass = mm->ip4_reass_allocated; + if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) { + u16 ri = mm->ip4_reass_fifo_last; + do { + map_ip4_reass_t *r = pool_elt_at_index(mm->ip4_reass_pool, ri); + for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if (r->fragments[i] != ~0) + map_ip4_drop_pi(r->fragments[i]); + + ri = r->fifo_next; + pool_put(mm->ip4_reass_pool, r); + } while (ri != mm->ip4_reass_fifo_last); + } + + vec_free(mm->ip4_reass_hash_table); + vec_resize(mm->ip4_reass_hash_table, 1 << mm->ip4_reass_ht_log2len); + for (i=0; i<(1 << mm->ip4_reass_ht_log2len); i++) + mm->ip4_reass_hash_table[i] = MAP_REASS_INDEX_NONE; + pool_free(mm->ip4_reass_pool); + pool_alloc(mm->ip4_reass_pool, mm->ip4_reass_conf_pool_size); + + mm->ip4_reass_allocated = 0; + mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; + mm->ip4_reass_buffered_counter = 0; +} + +u8 map_get_ht_log2len(f32 ht_ratio, u16 pool_size) +{ + u32 desired_size = (u32)(pool_size * ht_ratio); + u8 i; + for (i=1; i<31; i++) + if ((1 << i) >= desired_size) + return i; + return 4; +} + +int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + if (ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX) + return -1; + + map_ip4_reass_lock(); + mm->ip4_reass_conf_ht_ratio = ht_ratio; + mm->ip4_reass_ht_log2len = map_get_ht_log2len(ht_ratio, mm->ip4_reass_conf_pool_size); + map_ip4_reass_reinit(trashed_reass, dropped_packets); + map_ip4_reass_unlock(); + return 0; +} + +int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + if (pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX) + return -1; + + map_ip4_reass_lock(); + mm->ip4_reass_conf_pool_size = pool_size; + map_ip4_reass_reinit(trashed_reass, dropped_packets); + map_ip4_reass_unlock(); + return 0; +} + +int map_ip4_reass_conf_lifetime(u16 lifetime_ms) +{ + map_main.ip4_reass_conf_lifetime_ms = lifetime_ms; + return 0; +} + +int map_ip4_reass_conf_buffers(u32 buffers) +{ + map_main.ip4_reass_conf_buffers = buffers; + return 0; +} + +void map_ip6_reass_reinit(u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + if(dropped_packets) + *dropped_packets = mm->ip6_reass_buffered_counter; + if(trashed_reass) + *trashed_reass = mm->ip6_reass_allocated; + int i; + if (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) { + u16 ri = mm->ip6_reass_fifo_last; + do { + map_ip6_reass_t *r = pool_elt_at_index(mm->ip6_reass_pool, ri); + for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if (r->fragments[i].pi != ~0) + map_ip6_drop_pi(r->fragments[i].pi); + + ri = r->fifo_next; + pool_put(mm->ip6_reass_pool, r); + } while (ri != mm->ip6_reass_fifo_last); + mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; + } + + vec_free(mm->ip6_reass_hash_table); + vec_resize(mm->ip6_reass_hash_table, 1 << mm->ip6_reass_ht_log2len); + for(i=0; i<(1 << mm->ip6_reass_ht_log2len); i++) + mm->ip6_reass_hash_table[i] = MAP_REASS_INDEX_NONE; + pool_free(mm->ip6_reass_pool); + pool_alloc(mm->ip6_reass_pool, mm->ip4_reass_conf_pool_size); + + mm->ip6_reass_allocated = 0; + mm->ip6_reass_buffered_counter = 0; +} + +int map_ip6_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + if (ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX) + return -1; + + map_ip6_reass_lock(); + mm->ip6_reass_conf_ht_ratio = ht_ratio; + mm->ip6_reass_ht_log2len = map_get_ht_log2len(ht_ratio, mm->ip6_reass_conf_pool_size); + map_ip6_reass_reinit(trashed_reass, dropped_packets); + map_ip6_reass_unlock(); + return 0; +} + +int map_ip6_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + if (pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX) + return -1; + + map_ip6_reass_lock(); + mm->ip6_reass_conf_pool_size = pool_size; + map_ip6_reass_reinit(trashed_reass, dropped_packets); + map_ip6_reass_unlock(); + return 0; +} + +int map_ip6_reass_conf_lifetime(u16 lifetime_ms) +{ + map_main.ip6_reass_conf_lifetime_ms = lifetime_ms; + return 0; +} + +int map_ip6_reass_conf_buffers(u32 buffers) +{ + map_main.ip6_reass_conf_buffers = buffers; + return 0; +} + +VLIB_CLI_COMMAND(map_ip4_reass_lifetime_command, static) = { + .path = "map params reassembly", + .short_help = "[ip4 | ip6] [lifetime <lifetime-ms>] [pool-size <pool-size>] [buffers <buffers>] [ht-ratio <ht-ratio>]", + .function = map_params_reass_command_fn, +}; + +VLIB_CLI_COMMAND(map_traffic_class_command, static) = { + .path = "map params traffic-class", + .short_help = + "traffic-class {0x0-0xff | copy}", + .function = map_traffic_class_command_fn, +}; + +VLIB_CLI_COMMAND(map_pre_resolve_command, static) = { + .path = "map params pre-resolve", + .short_help = + "pre-resolve {ip4-nh <address>} | {ip6-nh <address>}", + .function = map_pre_resolve_command_fn, +}; + +VLIB_CLI_COMMAND(map_security_check_command, static) = { + .path = "map params security-check", + .short_help = + "security-check on|off", + .function = map_security_check_command_fn, +}; + +VLIB_CLI_COMMAND(map_icmp_relay_source_address_command, static) = { + .path = "map params icmp-source-address", + .short_help = + "icmp-source-address <ip4-address>", + .function = map_icmp_relay_source_address_command_fn, +}; + +VLIB_CLI_COMMAND(map_security_check_frag_command, static) = { + .path = "map params security-check fragments", + .short_help = + "fragments on|off", + .function = map_security_check_frag_command_fn, +}; + +VLIB_CLI_COMMAND(map_add_domain_command, static) = { + .path = "map add domain", + .short_help = + "map add domain ip4-pfx <ip4-pfx> ip6-pfx <ip6-pfx> ip6-src <ip6-pfx> " + "ea-bits-len <n> psid-offset <n> psid-len <n> [map-t] [mtu <mtu>]", + .function = map_add_domain_command_fn, +}; + +VLIB_CLI_COMMAND(map_add_rule_command, static) = { + .path = "map add rule", + .short_help = + "map add rule index <domain> psid <psid> ip6-dst <ip6-addr>", + .function = map_add_rule_command_fn, +}; + +VLIB_CLI_COMMAND(map_del_command, static) = { + .path = "map del domain", + .short_help = + "map del domain index <domain>", + .function = map_del_domain_command_fn, +}; + +VLIB_CLI_COMMAND(show_map_domain_command, static) = { + .path = "show map domain", + .function = show_map_domain_command_fn, +}; + +VLIB_CLI_COMMAND(show_map_stats_command, static) = { + .path = "show map stats", + .function = show_map_stats_command_fn, +}; + +VLIB_CLI_COMMAND(show_map_fragments_command, static) = { + .path = "show map fragments", + .function = show_map_fragments_command_fn, +}; + +/* + * map_init + */ +clib_error_t *map_init (vlib_main_t *vm) +{ + map_main_t *mm = &map_main; + mm->vnet_main = vnet_get_main(); + mm->vlib_main = vm; + +#ifdef MAP_SKIP_IP6_LOOKUP + memset(&mm->preresolve_ip4, 0, sizeof(mm->preresolve_ip4)); + memset(&mm->preresolve_ip6, 0, sizeof(mm->preresolve_ip6)); + mm->adj4_index = 0; + mm->adj6_index = 0; +#endif + + /* traffic class */ + mm->tc = 0; + mm->tc_copy = true; + + /* Inbound security check */ + mm->sec_check = true; + mm->sec_check_frag = false; + + vec_validate(mm->domain_counters, MAP_N_DOMAIN_COUNTER - 1); + mm->domain_counters[MAP_DOMAIN_COUNTER_RX].name = "rx"; + mm->domain_counters[MAP_DOMAIN_COUNTER_TX].name = "tx"; + + vlib_validate_simple_counter(&mm->icmp_relayed, 0); + vlib_zero_simple_counter(&mm->icmp_relayed, 0); + + /* IP4 virtual reassembly */ + mm->ip4_reass_hash_table = 0; + mm->ip4_reass_pool = 0; + mm->ip4_reass_lock = clib_mem_alloc_aligned(CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + mm->ip4_reass_conf_ht_ratio = MAP_IP4_REASS_HT_RATIO_DEFAULT; + mm->ip4_reass_conf_lifetime_ms = MAP_IP4_REASS_LIFETIME_DEFAULT; + mm->ip4_reass_conf_pool_size = MAP_IP4_REASS_POOL_SIZE_DEFAULT; + mm->ip4_reass_conf_buffers = MAP_IP4_REASS_BUFFERS_DEFAULT; + mm->ip4_reass_ht_log2len = map_get_ht_log2len(mm->ip4_reass_conf_ht_ratio, mm->ip4_reass_conf_pool_size); + mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; + map_ip4_reass_reinit(NULL, NULL); + + /* IP6 virtual reassembly */ + mm->ip6_reass_hash_table = 0; + mm->ip6_reass_pool = 0; + mm->ip6_reass_lock = clib_mem_alloc_aligned(CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + mm->ip6_reass_conf_ht_ratio = MAP_IP6_REASS_HT_RATIO_DEFAULT; + mm->ip6_reass_conf_lifetime_ms = MAP_IP6_REASS_LIFETIME_DEFAULT; + mm->ip6_reass_conf_pool_size = MAP_IP6_REASS_POOL_SIZE_DEFAULT; + mm->ip6_reass_conf_buffers = MAP_IP6_REASS_BUFFERS_DEFAULT; + mm->ip6_reass_ht_log2len = map_get_ht_log2len(mm->ip6_reass_conf_ht_ratio, mm->ip6_reass_conf_pool_size); + mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; + map_ip6_reass_reinit(NULL, NULL); + + return 0; +} + +VLIB_INIT_FUNCTION(map_init); diff --git a/vnet/vnet/map/map.h b/vnet/vnet/map/map.h new file mode 100644 index 00000000000..ae58cdb9120 --- /dev/null +++ b/vnet/vnet/map/map.h @@ -0,0 +1,556 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <stdbool.h> +#include <vppinfra/error.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vlib/vlib.h> + +#define MAP_SKIP_IP6_LOOKUP 1 + +typedef enum { + MAP_SENDER, + MAP_RECEIVER +} map_dir_e; + +int map_create_domain(ip4_address_t *ip4_prefix, u8 ip4_prefix_len, + ip6_address_t *ip6_prefix, u8 ip6_prefix_len, + ip6_address_t *ip6_src, u8 ip6_src_len, + u8 ea_bits_len, u8 psid_offset, u8 psid_length, + u32 *map_domain_index, u16 mtu, u8 flags); +int map_delete_domain(u32 map_domain_index); +int map_add_del_psid(u32 map_domain_index, u16 psid, ip6_address_t *tep, u8 is_add); +u8 *format_map_trace(u8 *s, va_list *args); +i32 ip4_get_port(ip4_header_t *ip, map_dir_e dir, u16 buffer_len); +i32 ip6_get_port(ip6_header_t *ip6, map_dir_e dir, u16 buffer_len); +u16 ip4_map_get_port (ip4_header_t *ip, map_dir_e dir); + +typedef enum __attribute__ ((__packed__)) { + MAP_DOMAIN_PREFIX = 1 << 0, + MAP_DOMAIN_TRANSLATION = 1 << 1, // The domain uses MAP-T +} map_domain_flags_e; + +/** + * IP4 reassembly logic: + * One virtually reassembled flow requires a map_ip4_reass_t structure in order + * to keep the first-fragment port number and, optionally, cache out of sequence + * packets. + * There are up to MAP_IP4_REASS_MAX_REASSEMBLY such structures. + * When in use, those structures are stored in a hash table of MAP_IP4_REASS_BUCKETS buckets. + * When a new structure needs to be used, it is allocated from available ones. + * If there is no structure available, the oldest in use is selected and used if and + * only if it was first allocated more than MAP_IP4_REASS_LIFETIME seconds ago. + * In case no structure can be allocated, the fragment is dropped. + */ + +#define MAP_IP4_REASS_LIFETIME_DEFAULT (100) /* ms */ +#define MAP_IP4_REASS_HT_RATIO_DEFAULT (1.0) +#define MAP_IP4_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures +#define MAP_IP4_REASS_BUFFERS_DEFAULT 2048 + +#define MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 // Number of fragment per reassembly + +#define MAP_IP6_REASS_LIFETIME_DEFAULT (100) /* ms */ +#define MAP_IP6_REASS_HT_RATIO_DEFAULT (1.0) +#define MAP_IP6_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures +#define MAP_IP6_REASS_BUFFERS_DEFAULT 2048 + +#define MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 + +#define MAP_IP6_REASS_COUNT_BYTES +#define MAP_IP4_REASS_COUNT_BYTES + +//#define IP6_MAP_T_OVERRIDE_TOS 0 + +/* + * This structure _MUST_ be no larger than a single cache line (64 bytes). + * If more space is needed make a union of ip6_prefix and *rules, those are mutually exclusive. + */ +typedef struct { + ip6_address_t ip6_src; + ip6_address_t ip6_prefix; + ip6_address_t *rules; + u32 suffix_mask; + ip4_address_t ip4_prefix; + u16 psid_mask; + u16 mtu; + map_domain_flags_e flags; + u8 ip6_prefix_len; + u8 ip6_src_len; + u8 ea_bits_len; + u8 psid_offset; + u8 psid_length; + + /* helpers */ + u8 psid_shift; + u8 suffix_shift; + u8 ea_shift; + + /* not used by forwarding */ + u8 ip4_prefix_len; +} map_domain_t; + +#define MAP_REASS_INDEX_NONE ((u16)0xffff) + +/* + * Hash key, padded out to 16 bytes for fast compare + */ +typedef union { + CLIB_PACKED (struct { + ip4_address_t src; + ip4_address_t dst; + u16 fragment_id; + u8 protocol; + }); + u64 as_u64[2]; + u32 as_u32[4]; +} map_ip4_reass_key_t; + +typedef struct { + map_ip4_reass_key_t key; + f64 ts; +#ifdef MAP_IP4_REASS_COUNT_BYTES + u16 expected_total; + u16 forwarded; +#endif + i32 port; + u16 bucket; + u16 bucket_next; + u16 fifo_prev; + u16 fifo_next; + u32 fragments[MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; +} map_ip4_reass_t; + +/* + * MAP domain counters + */ +typedef enum { + /* Simple counters */ + MAP_DOMAIN_IPV4_FRAGMENT = 0, + /* Combined counters */ + MAP_DOMAIN_COUNTER_RX = 0, + MAP_DOMAIN_COUNTER_TX, + MAP_N_DOMAIN_COUNTER +} map_domain_counter_t; + +/* + * main_main_t + */ +typedef union { + CLIB_PACKED (struct { + ip6_address_t src; + ip6_address_t dst; + u32 fragment_id; + u8 protocol; + }); + u64 as_u64[5]; + u32 as_u32[10]; +} map_ip6_reass_key_t; + +typedef struct { + u32 pi; //Cached packet or ~0 + u16 next_data_offset; //The data offset of the additional 20 bytes or ~0 + u8 next_data_len; //Number of bytes ready to be copied (20 if not last fragment) + u8 next_data[20]; //The 20 additional bytes +} map_ip6_fragment_t; + +typedef struct { + map_ip6_reass_key_t key; + f64 ts; +#ifdef MAP_IP6_REASS_COUNT_BYTES + u16 expected_total; + u16 forwarded; +#endif + u16 bucket; //What hash bucket this element is linked in + u16 bucket_next; + u16 fifo_prev; + u16 fifo_next; + ip4_header_t ip4_header; + map_ip6_fragment_t fragments[MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; +} map_ip6_reass_t; + +typedef struct { + /* pool of MAP domains */ + map_domain_t *domains; + + /* MAP Domain packet/byte counters indexed by map domain index */ + vlib_simple_counter_main_t *simple_domain_counters; + vlib_combined_counter_main_t *domain_counters; + volatile u32 *counter_lock; + + /* Global counters */ + vlib_simple_counter_main_t icmp_relayed; + +#ifdef MAP_SKIP_IP6_LOOKUP + /* pre-presolve */ + u32 adj6_index, adj4_index; + ip4_address_t preresolve_ip4; + ip6_address_t preresolve_ip6; +#endif + + /* Traffic class: zero, copy (~0) or fixed value */ + u8 tc; + bool tc_copy; + bool sec_check; + bool sec_check_frag; + + /* ICMPv6 -> ICMPv4 relay parameters */ + ip4_address_t icmp_src_address; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + + /* + * IPv4 encap and decap reassembly + */ + //Conf + f32 ip4_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) + u16 ip4_reass_conf_pool_size; //Max number of allocated reass structures + u16 ip4_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms + u32 ip4_reass_conf_buffers; //Maximum number of buffers used by ip4 reassembly + + //Runtime + map_ip4_reass_t *ip4_reass_pool; + u8 ip4_reass_ht_log2len; //Hash table size is 2^log2len + u16 ip4_reass_allocated; + u16 *ip4_reass_hash_table; + u16 ip4_reass_fifo_last; + volatile u32 *ip4_reass_lock; + + //Counters + u32 ip4_reass_buffered_counter; + + /* + * IPv6 decap reassembly + */ + //Conf + f32 ip6_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) + u16 ip6_reass_conf_pool_size; //Max number of allocated reass structures + u16 ip6_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms + u32 ip6_reass_conf_buffers; //Maximum number of buffers used by ip6 reassembly + + //Runtime + map_ip6_reass_t *ip6_reass_pool; + u8 ip6_reass_ht_log2len; //Hash table size is 2^log2len + u16 ip6_reass_allocated; + u16 *ip6_reass_hash_table; + u16 ip6_reass_fifo_last; + volatile u32 *ip6_reass_lock; + + //Counters + u32 ip6_reass_buffered_counter; + +} map_main_t; + +/* + * TODO: Remove SEC_CHECK / TRANSLATED_4TO6 / TRANSLATED_6TO4 + */ +#define foreach_map_error \ + /* Must be first. */ \ + _(NONE, "valid MAP packets") \ + _(BAD_PROTOCOL, "bad protocol") \ + _(WRONG_ICMP_TYPE, "wrong icmp type") \ + _(SEC_CHECK, "security check failed") \ + _(ENCAP_SEC_CHECK, "encap security check failed") \ + _(DECAP_SEC_CHECK, "decap security check failed") \ + _(ICMP, "unable to translate ICMP") \ + _(ICMP_RELAY, "unable to relay ICMP") \ + _(UNKNOWN, "unknown") \ + _(NO_DOMAIN, "no domain") \ + _(FRAGMENTED, "packet is a fragment") \ + _(FRAGMENT_MEMORY, "could not cache fragment") \ + _(FRAGMENT_MALFORMED, "fragment has unexpected format")\ + _(FRAGMENT_DROPPED, "dropped cached fragment") \ + _(MALFORMED, "malformed packet") + +typedef enum { +#define _(sym,str) MAP_ERROR_##sym, + foreach_map_error +#undef _ + MAP_N_ERROR, + } map_error_t; + +u64 map_error_counter_get(u32 node_index, map_error_t map_error); + +typedef struct { + u32 map_domain_index; + u16 port; +} map_trace_t; + +map_main_t map_main; + +vlib_node_registration_t ip4_map_node; +vlib_node_registration_t ip6_map_node; + +vlib_node_registration_t ip4_map_t_node; +vlib_node_registration_t ip4_map_t_fragmented_node; +vlib_node_registration_t ip4_map_t_tcp_udp_node; +vlib_node_registration_t ip4_map_t_icmp_node; + +vlib_node_registration_t ip6_map_t_node; +vlib_node_registration_t ip6_map_t_fragmented_node; +vlib_node_registration_t ip6_map_t_tcp_udp_node; +vlib_node_registration_t ip6_map_t_icmp_node; + +/* + * map_get_pfx + */ +static_always_inline u64 +map_get_pfx (map_domain_t *d, u32 addr, u16 port) +{ + u16 psid = (port >> d->psid_shift) & d->psid_mask; + + if (d->ea_bits_len == 0 && d->rules) + return clib_net_to_host_u64(d->rules[psid].as_u64[0]); + + u32 suffix = (addr >> d->suffix_shift) & d->suffix_mask; + u64 ea = d->ea_bits_len == 0 ? 0 : (((u64) suffix << d->psid_length)) | psid; + + return clib_net_to_host_u64(d->ip6_prefix.as_u64[0]) | ea << d->ea_shift; +} + +static_always_inline u64 +map_get_pfx_net (map_domain_t *d, u32 addr, u16 port) +{ + return clib_host_to_net_u64(map_get_pfx(d, clib_net_to_host_u32(addr), + clib_net_to_host_u16(port))); +} + +/* + * map_get_sfx + */ +static_always_inline u64 +map_get_sfx (map_domain_t *d, u32 addr, u16 port) +{ + u16 psid = (port >> d->psid_shift) & d->psid_mask; + + /* Shared 1:1 mode. */ + if (d->ea_bits_len == 0 && d->rules) + return clib_net_to_host_u64(d->rules[psid].as_u64[1]); + if (d->ip6_prefix_len == 128) + return clib_net_to_host_u64(d->ip6_prefix.as_u64[1]); + + /* IPv4 prefix */ + if (d->flags & MAP_DOMAIN_PREFIX) + return (u64) (addr & ~d->suffix_mask) << 16; + + /* Shared or full IPv4 address */ + return ((u64) addr << 16) | psid; +} + +static_always_inline u64 +map_get_sfx_net (map_domain_t *d, u32 addr, u16 port) +{ + return clib_host_to_net_u64(map_get_sfx(d, clib_net_to_host_u32(addr), + clib_net_to_host_u16(port))); +} + +static_always_inline u32 +map_get_ip4 (ip6_address_t *addr) +{ + return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1]) >> 16); +} + +/* + * Get the MAP domain from an IPv4 lookup adjacency. + */ +static_always_inline map_domain_t * +ip4_map_get_domain (u32 adj_index, u32 *map_domain_index) +{ + map_main_t *mm = &map_main; + ip_lookup_main_t *lm = &ip4_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm, adj_index); + ASSERT(adj); + uword *p = (uword *)adj->rewrite_data; + ASSERT(p); + *map_domain_index = p[0]; + return pool_elt_at_index(mm->domains, p[0]); +} + +/* + * Get the MAP domain from an IPv6 lookup adjacency. + * If the IPv6 address or prefix is not shared, no lookup is required. + * The IPv4 address is used otherwise. + */ +static_always_inline map_domain_t * +ip6_map_get_domain (u32 adj_index, ip4_address_t *addr, + u32 *map_domain_index, u8 *error) +{ + map_main_t *mm = &map_main; + ip4_main_t *im4 = &ip4_main; + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm6, adj_index); + ASSERT(adj); + uword *p = (uword *)adj->rewrite_data; + ASSERT(p); + *map_domain_index = p[0]; + if (p[0] != ~0) + return pool_elt_at_index(mm->domains, p[0]); + + u32 ai = ip4_fib_lookup_with_table(im4, 0, addr, 0); + ip_adjacency_t *adj4 = ip_get_adjacency (lm4, ai); + if (PREDICT_TRUE(adj4->lookup_next_index == IP_LOOKUP_NEXT_MAP || + adj4->lookup_next_index == IP_LOOKUP_NEXT_MAP_T)) { + uword *p = (uword *)adj4->rewrite_data; + *map_domain_index = p[0]; + return pool_elt_at_index(mm->domains, *map_domain_index); + } + *error = MAP_ERROR_NO_DOMAIN; + return NULL; +} + +map_ip4_reass_t * +map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id, + u8 protocol, u32 **pi_to_drop); +void +map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop); + +#define map_ip4_reass_lock() while (__sync_lock_test_and_set(map_main.ip4_reass_lock, 1)) {} +#define map_ip4_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip4_reass_lock = 0;} while(0) + +static_always_inline void +map_ip4_reass_get_fragments(map_ip4_reass_t *r, u32 **pi) +{ + int i; + for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if(r->fragments[i] != ~0) { + vec_add1(*pi, r->fragments[i]); + r->fragments[i] = ~0; + map_main.ip4_reass_buffered_counter--; + } +} + +int map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi); + +map_ip6_reass_t * +map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id, + u8 protocol, u32 **pi_to_drop); +void +map_ip6_reass_free(map_ip6_reass_t *r, u32 **pi_to_drop); + +#define map_ip6_reass_lock() while (__sync_lock_test_and_set(map_main.ip6_reass_lock, 1)) {} +#define map_ip6_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip6_reass_lock = 0;} while(0) + +int +map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi, + u16 data_offset, u16 next_data_offset, + u8 *data_start, u16 data_len); + +void map_ip4_drop_pi(u32 pi); + +int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP4_REASS_CONF_HT_RATIO_MAX 100 +int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP4_REASS_CONF_POOL_SIZE_MAX (0xfeff) +int map_ip4_reass_conf_lifetime(u16 lifetime_ms); +#define MAP_IP4_REASS_CONF_LIFETIME_MAX 0xffff +int map_ip4_reass_conf_buffers(u32 buffers); +#define MAP_IP4_REASS_CONF_BUFFERS_MAX (0xffffffff) + +void map_ip6_drop_pi(u32 pi); + + +int map_ip6_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP6_REASS_CONF_HT_RATIO_MAX 100 +int map_ip6_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP6_REASS_CONF_POOL_SIZE_MAX (0xfeff) +int map_ip6_reass_conf_lifetime(u16 lifetime_ms); +#define MAP_IP6_REASS_CONF_LIFETIME_MAX 0xffff +int map_ip6_reass_conf_buffers(u32 buffers); +#define MAP_IP6_REASS_CONF_BUFFERS_MAX (0xffffffff) + +static_always_inline +int ip6_parse(const ip6_header_t *ip6, u32 buff_len, + u8 *l4_protocol, u16 *l4_offset, u16 *frag_hdr_offset) +{ + if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) { + *l4_protocol = ((ip6_frag_hdr_t *)(ip6 + 1))->next_hdr; + *frag_hdr_offset = sizeof(*ip6); + *l4_offset = sizeof(*ip6) + sizeof(ip6_frag_hdr_t); + } else { + *l4_protocol = ip6->protocol; + *frag_hdr_offset = 0; + *l4_offset = sizeof(*ip6); + } + + return (buff_len < (*l4_offset + 4)) || + (clib_net_to_host_u16(ip6->payload_length) < (*l4_offset + 4 - sizeof(*ip6))); +} + + +#define u8_ptr_add(ptr, index) (((u8 *)ptr) + index) +#define u16_net_add(u, val) clib_host_to_net_u16(clib_net_to_host_u16(u) + (val)) + +#define frag_id_6to4(id) ((id) ^ ((id) >> 16)) + +static_always_inline void +ip4_map_t_embedded_address (map_domain_t *d, + ip6_address_t *ip6, const ip4_address_t *ip4) +{ + ASSERT(d->ip6_src_len == 96); //No support for other lengths for now + ip6->as_u64[0] = d->ip6_src.as_u64[0]; + ip6->as_u32[2] = d->ip6_src.as_u32[2]; + ip6->as_u32[3] = ip4->as_u32; +} + +static_always_inline u32 +ip6_map_t_embedded_address (map_domain_t *d, ip6_address_t *addr) +{ + ASSERT(d->ip6_src_len == 96); //No support for other lengths for now + return addr->as_u32[3]; +} + +static inline void +map_domain_counter_lock (map_main_t *mm) +{ + if (mm->counter_lock) + while (__sync_lock_test_and_set(mm->counter_lock, 1)) + /* zzzz */ ; +} +static inline void +map_domain_counter_unlock (map_main_t *mm) +{ + if (mm->counter_lock) + *mm->counter_lock = 0; +} + + +static_always_inline void +map_send_all_to_node(vlib_main_t *vm, u32 *pi_vector, + vlib_node_runtime_t *node, vlib_error_t *error, + u32 next) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + //Deal with fragments that are ready + from = pi_vector; + n_left_from = vec_len(pi_vector); + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + vlib_buffer_t *p0 = vlib_get_buffer(vm, pi0); + p0->error = *error; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } +} diff --git a/vnet/vnet/map/map_doc.md b/vnet/vnet/map/map_doc.md new file mode 100644 index 00000000000..230c52dfafd --- /dev/null +++ b/vnet/vnet/map/map_doc.md @@ -0,0 +1,69 @@ +# VPP MAP and Lw4o6 implementation + +This is a memo intended to contain documentation of the VPP MAP and Lw4o6 implementations. +Everything that is not directly obvious should come here. + + + +## MAP-E Virtual Reassembly + +The MAP-E implementation supports handling of IPv4 fragments as well as IPv4-in-IPv6 inner and outer fragments. This is called virtual reassembly because the fragments are not actually reassembled. Instead, some meta-data are kept about the first fragment and reused for subsequent fragments. + +Fragment caching and handling is not always necessary. It is performed when: +* An IPv4 fragment is received and the destination IPv4 address is shared. +* An IPv6 packet is received with an inner IPv4 fragment, the IPv4 source address is shared, and 'security-check fragments' is on. +* An IPv6 fragment is received. + +There are 3 dedicated nodes: +* ip4-map-reass +* ip6-map-ip4-reass +* ip6-map-ip6-reass + +ip4-map sends all fragments to ip4-map-reass. +ip6-map sends all inner-fragments to ip6-map-ip4-reass. +ip6-map sends all outer-fragments to ip6-map-ip6-reass. + +IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp. IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6). Therefore, each packet reassembly makes use of exactly one reassembly structure. When such a structure is allocated, it is timestamped with the current time. Finally, those structures are capable of storing a limited number of buffer indexes. + +An IPv4 (resp. IPv6) reassembly structure can cache up to MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp. MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached until the first fragment is received. + +#### Virtual Reassembly configuration + +IPv4 and IPv6 virtual reassembly support the following configuration: + map params reassembly [ip4 | ip6] [lifetime <lifetime-ms>] [pool-size <pool-size>] [buffers <buffers>] [ht-ratio <ht-ratio>] + +lifetime: + The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 people. Those values are not realistic for high-throughput cases. + +buffers: + The upper limit of buffers that are allowed to be cached. It can be used to protect against fragmentation attacks which would aim to exhaust the global buffers pool. + +pool-size: + The number of reassembly structures that can be allocated. As each structure can store a small fixed number of fragments, it also sets an upper-bound of 'pool-size * MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY' buffers that can be cached in total. + +ht-ratio: + The amount of buckets in the hash-table is pool-size * ht-ratio. + + +Any time pool-size and ht-ratio is modified, the hash-table is destroyed and created again, which means all current state is lost. + + +##### Additional considerations + +Reassembly at high rate is expensive in terms of buffers. There is a trade-off between the lifetime and number of allocated buffers. Reducing the lifetime helps, but at the cost of loosing state for fragments that are wide appart. + +Let: +R be the packet rate at which fragments are received. +F be the number of fragments per packet. + +Assuming the first fragment is always received last. We should have: +buffers > lifetime * R / F * (F - 1) +pool-size > lifetime * R/F + +This is a worst case. Receiving the first fragment earlier helps reducing the number of required buffers. Also, an optimization is implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES) which counts the number of transmitted bytes and remembers the total number of bytes which should be transmitted based on the last fragment, and therefore helps reducing 'pool-size'. + +But the formula shows that it is challenging to forward a significant amount of fragmented packets at high rates. For instance, with a lifetime of 1 second, 5Mpps packet rate would require buffering up to 2.5 millions fragments. + +If you want to do that, be prepared to configure a lot of fragments. + + diff --git a/vnet/vnet/map/sixrd.c b/vnet/vnet/map/sixrd.c new file mode 100644 index 00000000000..26b4eea9a86 --- /dev/null +++ b/vnet/vnet/map/sixrd.c @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "sixrd.h" + +/* + * This code supports the following sixrd modes: + * + * 32 EA bits (Complete IPv4 address is embedded): + * ea_bits_len = 32 + * IPv4 suffix is embedded: + * ea_bits_len = < 32 + * No embedded address bits (1:1 mode): + * ea_bits_len = 0 + */ + +int +sixrd_create_domain (ip6_address_t *ip6_prefix, + u8 ip6_prefix_len, + ip4_address_t *ip4_prefix, + u8 ip4_prefix_len, + ip4_address_t *ip4_src, + u32 *sixrd_domain_index, + u16 mtu) +{ + sixrd_main_t *mm = &sixrd_main; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + sixrd_domain_t *d; + ip_adjacency_t adj; + ip4_add_del_route_args_t args4; + ip6_add_del_route_args_t args6; + u32 *p; + + /* Get domain index */ + pool_get_aligned(mm->domains, d, CLIB_CACHE_LINE_BYTES); + memset(d, 0, sizeof (*d)); + *sixrd_domain_index = d - mm->domains; + + /* Init domain struct */ + d->ip4_prefix.as_u32 = ip4_prefix->as_u32; + d->ip4_prefix_len = ip4_prefix_len; + d->ip6_prefix = *ip6_prefix; + d->ip6_prefix_len = ip6_prefix_len; + d->ip4_src = *ip4_src; + d->mtu = mtu; + + if (ip4_prefix_len < 32) + d->shift = 64 - ip6_prefix_len + (32 - ip4_prefix_len); + + /* Init IP adjacency */ + memset(&adj, 0, sizeof(adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = IP_LOOKUP_NEXT_SIXRD; + p = (u32 *)&adj.rewrite_data[0]; + *p = (u32) (*sixrd_domain_index); + + /* Create ip6 adjacency */ + memset(&args6, 0, sizeof(args6)); + args6.table_index_or_table_id = 0; + args6.flags = IP6_ROUTE_FLAG_ADD; + args6.dst_address.as_u64[0] = ip6_prefix->as_u64[0]; + args6.dst_address.as_u64[1] = ip6_prefix->as_u64[1]; + args6.dst_address_length = ip6_prefix_len; + args6.adj_index = ~0; + args6.add_adj = &adj; + args6.n_add_adj = 1; + ip6_add_del_route(im6, &args6); + + /* Multiple SIXRD domains may share same source IPv4 TEP */ + uword *q = ip4_get_route(im4, 0, 0, (u8 *)ip4_src, 32); + if (q) { + u32 ai = q[0]; + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_adjacency_t *adj4 = ip_get_adjacency(lm4, ai); + if (adj4->lookup_next_index != IP_LOOKUP_NEXT_SIXRD) { + clib_warning("BR source address already assigned: %U", format_ip4_address, ip4_src); + pool_put(mm->domains, d); + return -1; + } + /* Shared source */ + p = (u32 *)&adj4->rewrite_data[0]; + p[0] = ~0; + + /* Add refcount, so we don't accidentially delete the route underneath someone */ + p[1]++; + } else { + /* Create ip4 adjacency. */ + memset(&args4, 0, sizeof(args4)); + args4.table_index_or_table_id = 0; + args4.flags = IP4_ROUTE_FLAG_ADD; + args4.dst_address.as_u32 = ip4_src->as_u32; + args4.dst_address_length = 32; + args4.adj_index = ~0; + args4.add_adj = &adj; + args4.n_add_adj = 1; + ip4_add_del_route(im4, &args4); + } + + return 0; +} + +/* + * sixrd_delete_domain + */ +int +sixrd_delete_domain (u32 sixrd_domain_index) +{ + sixrd_main_t *mm = &sixrd_main; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + sixrd_domain_t *d; + ip_adjacency_t adj; + ip4_add_del_route_args_t args4; + ip6_add_del_route_args_t args6; + + if (pool_is_free_index(mm->domains, sixrd_domain_index)) { + clib_warning("SIXRD domain delete: domain does not exist: %d", sixrd_domain_index); + return -1; + } + + d = pool_elt_at_index(mm->domains, sixrd_domain_index); + + memset(&adj, 0, sizeof(adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = IP_LOOKUP_NEXT_SIXRD; + + /* Delete ip6 adjacency */ + memset(&args6, 0, sizeof (args6)); + args6.table_index_or_table_id = 0; + args6.flags = IP6_ROUTE_FLAG_DEL; + args6.dst_address.as_u64[0] = d->ip6_prefix.as_u64[0]; + args6.dst_address.as_u64[1] = d->ip6_prefix.as_u64[1]; + args6.dst_address_length = d->ip6_prefix_len; + args6.adj_index = 0; + args6.add_adj = &adj; + args6.n_add_adj = 0; + ip6_add_del_route(im6, &args6); + + /* Delete ip4 adjacency */ + uword *q = ip4_get_route(im4, 0, 0, (u8 *)&d->ip4_src, 32); + if (q) { + u32 ai = q[0]; + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_adjacency_t *adj4 = ip_get_adjacency(lm4, ai); + + u32 *p = (u32 *)&adj4->rewrite_data[0]; + /* Delete route when no other domains use this source */ + if (p[1] == 0) { + memset(&args4, 0, sizeof(args4)); + args4.table_index_or_table_id = 0; + args4.flags = IP4_ROUTE_FLAG_DEL; + args4.dst_address.as_u32 = d->ip4_prefix.as_u32; + args4.dst_address_length = d->ip4_prefix_len; + args4.adj_index = 0; + args4.add_adj = &adj; + args4.n_add_adj = 0; + ip4_add_del_route(im4, &args4); + } + p[1]--; + } + + pool_put(mm->domains, d); + + return 0; +} + +static clib_error_t * +sixrd_add_domain_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t ip4_prefix; + ip6_address_t ip6_prefix; + ip4_address_t ip4_src; + u32 ip6_prefix_len, ip4_prefix_len, sixrd_domain_index; + u32 num_m_args = 0; + /* Optional arguments */ + u32 mtu = 0; + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "ip6-pfx %U/%d", unformat_ip6_address, &ip6_prefix, &ip6_prefix_len)) + num_m_args++; + else if (unformat(line_input, "ip4-pfx %U/%d", unformat_ip4_address, &ip4_prefix, &ip4_prefix_len)) + num_m_args++; + else if (unformat(line_input, "ip4-src %U", unformat_ip4_address, &ip4_src)) + num_m_args++; + else if (unformat(line_input, "mtu %d", &mtu)) + num_m_args++; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (num_m_args < 3) + return clib_error_return(0, "mandatory argument(s) missing"); + + sixrd_create_domain(&ip6_prefix, ip6_prefix_len, &ip4_prefix, ip4_prefix_len, + &ip4_src, &sixrd_domain_index, mtu); + + return 0; +} + +static clib_error_t * +sixrd_del_domain_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 num_m_args = 0; + u32 sixrd_domain_index; + + /* Get a line of input. */ + if (! unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "index %d", &sixrd_domain_index)) + num_m_args++; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (num_m_args != 1) + return clib_error_return(0, "mandatory argument(s) missing"); + + sixrd_delete_domain(sixrd_domain_index); + + return 0; +} + +static u8 * +format_sixrd_domain (u8 *s, va_list *args) +{ + sixrd_domain_t *d = va_arg(*args, sixrd_domain_t *); + sixrd_main_t *mm = &sixrd_main; + + s = format(s, + "[%d] ip6-pfx %U/%d ip4-pfx %U/%d ip4-src %U mtu %d", + d - mm->domains, + format_ip6_address, &d->ip6_prefix, d->ip6_prefix_len, + format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len, + format_ip4_address, &d->ip4_src, d->mtu); + + return s; +} + +static clib_error_t * +show_sixrd_domain_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + sixrd_main_t *mm = &sixrd_main; + sixrd_domain_t *d; + + if (pool_elts(mm->domains) == 0) + vlib_cli_output(vm, "No SIXRD domains are configured..."); + + pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_sixrd_domain, d);})); + + return 0; + +} + +static clib_error_t * +show_sixrd_stats_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + sixrd_main_t *mm = &sixrd_main; + sixrd_domain_t *d; + int domains = 0, domaincount = 0; + if (pool_elts (mm->domains) == 0) + vlib_cli_output (vm, "No SIXRD domains are configured..."); + + pool_foreach(d, mm->domains, ({ + domains += sizeof(*d); + domaincount++; + })); + + vlib_cli_output(vm, "SIXRD domains structure: %d\n", sizeof (sixrd_domain_t)); + vlib_cli_output(vm, "SIXRD domains: %d (%d bytes)\n", domaincount, domains); + + return 0; +} + +/* + * packet trace format function + */ +u8 * +format_sixrd_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *); + sixrd_trace_t *t = va_arg (*args, sixrd_trace_t *); + u32 sixrd_domain_index = t->sixrd_domain_index; + + s = format(s, "SIXRD domain index: %d", sixrd_domain_index); + + return s; +} + +VLIB_CLI_COMMAND(sixrd_add_domain_command, static) = { + .path = "sixrd add domain", + .short_help = + "sixrd add domain ip6-pfx <ip6-pfx> ip4-pfx <ip4-pfx> ip4-src <ip4-addr>", + .function = sixrd_add_domain_command_fn, +}; + +VLIB_CLI_COMMAND(sixrd_del_command, static) = { + .path = "sixrd del domain", + .short_help = + "sixrd del domain index <domain>", + .function = sixrd_del_domain_command_fn, +}; + +VLIB_CLI_COMMAND(show_sixrd_domain_command, static) = { + .path = "show sixrd domain", + .function = show_sixrd_domain_command_fn, +}; + +VLIB_CLI_COMMAND(show_sixrd_stats_command, static) = { + .path = "show sixrd stats", + .function = show_sixrd_stats_command_fn, +}; + +/* + * sixrd_init + */ +clib_error_t *sixrd_init (vlib_main_t *vm) +{ + sixrd_main_t *mm = &sixrd_main; + + mm->vnet_main = vnet_get_main(); + mm->vlib_main = vm; + + return 0; +} + +VLIB_INIT_FUNCTION(sixrd_init); diff --git a/vnet/vnet/map/sixrd.h b/vnet/vnet/map/sixrd.h new file mode 100644 index 00000000000..d741cb278b5 --- /dev/null +++ b/vnet/vnet/map/sixrd.h @@ -0,0 +1,144 @@ +/*--------------------------------------------------------------------------- + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +#include <stdbool.h> +#include <vppinfra/error.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> + +vlib_node_registration_t ip6_sixrd_node; +vlib_node_registration_t ip4_sixrd_node; + +int sixrd_create_domain(ip6_address_t *ip6_prefix, u8 ip6_prefix_len, + ip4_address_t *ip4_prefix, u8 ip4_prefix_len, + ip4_address_t *ip4_src, u32 *sixrd_domain_index, u16 mtu); +int sixrd_delete_domain(u32 sixrd_domain_index); +u8 *format_sixrd_trace(u8 *s, va_list *args); + +typedef struct { + ip6_address_t ip6_prefix; + ip4_address_t ip4_prefix; + ip4_address_t ip4_src; + u8 ip6_prefix_len; + u8 ip4_prefix_len; + + /* helpers */ + u8 shift; + + u16 mtu; +} sixrd_domain_t; + +typedef struct { + /* pool of SIXRD domains */ + sixrd_domain_t *domains; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} sixrd_main_t; + +#define foreach_sixrd_error \ + /* Must be first. */ \ + _(NONE, "valid SIXRD packets") \ + _(BAD_PROTOCOL, "bad protocol") \ + _(WRONG_ICMP_TYPE, "wrong icmp type") \ + _(SEC_CHECK, "security check failed") \ + _(ICMP, "unable to translate ICMP") \ + _(UNKNOWN, "unknown") \ + _(NO_DOMAIN, "no domain") \ + _(ENCAPSULATED, "encapsulated") \ + _(DECAPSULATED, "decapsulated") \ + _(TRANSLATED_4TO6, "translated 4 to 6") \ + _(TRANSLATED_6TO4, "translated 6 to 4") \ + _(FRAGMENT, "fragment handling error") \ + _(FRAGMENT_QUEUED, "dropped, missing first fragment") \ + _(FRAGMENTED, "packets requiring fragmentation") \ + _(FRAGMENT_PARTS, "fragment parts") \ + _(MALFORMED, "malformed packet") + +typedef enum { +#define _(sym,str) SIXRD_ERROR_##sym, + foreach_sixrd_error +#undef _ + SIXRD_N_ERROR, + } sixrd_error_t; + +typedef struct { + u32 sixrd_domain_index; +} sixrd_trace_t; + +sixrd_main_t sixrd_main; + +/* + * sixrd_get_addr + */ +static_always_inline u32 +sixrd_get_addr (sixrd_domain_t *d, u64 dal) +{ + + /* 1:1 mode */ + if (d->ip4_prefix_len == 32) return (d->ip4_prefix.as_u32); + + /* Grab 32 - ip4_prefix_len bits out of IPv6 address from offset ip6_prefix_len */ + return (d->ip4_prefix.as_u32 | (u32)(dal >> d->shift)); +} + +/* + * Get the SIXRD domain from an IPv6 lookup adjacency. + */ +static_always_inline sixrd_domain_t * +ip6_sixrd_get_domain (u32 adj_index, u32 *sixrd_domain_index) +{ + sixrd_main_t *mm = &sixrd_main; + ip_lookup_main_t *lm = &ip6_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm, adj_index); + ASSERT(adj); + uword *p = (uword *)adj->rewrite_data; + ASSERT(p); + *sixrd_domain_index = p[0]; + return pool_elt_at_index(mm->domains, p[0]); +} + +/* + * Get the SIXRD domain from an IPv4 lookup adjacency. + * If the IPv4 address is not shared, no lookup is required. + * The IPv6 address is used otherwise. + */ +static_always_inline sixrd_domain_t * +ip4_sixrd_get_domain (u32 adj_index, ip6_address_t *addr, + u32 *sixrd_domain_index, u8 *error) +{ + sixrd_main_t *mm = &sixrd_main; + ip6_main_t *im6 = &ip6_main; + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm4, adj_index); + ASSERT(adj); + uword *p = (uword *)adj->rewrite_data; + ASSERT(p); + *sixrd_domain_index = p[0]; + if (p[0] != ~0) + return pool_elt_at_index(mm->domains, p[0]); + + u32 ai = ip6_fib_lookup_with_table(im6, 0, addr); + ip_adjacency_t *adj6 = ip_get_adjacency (lm6, ai); + if (PREDICT_TRUE(adj6->lookup_next_index == IP_LOOKUP_NEXT_SIXRD)) { + uword *p = (uword *)adj6->rewrite_data; + *sixrd_domain_index = p[0]; + return pool_elt_at_index(mm->domains, *sixrd_domain_index); + } + *error = SIXRD_ERROR_NO_DOMAIN; + return NULL; +} diff --git a/vnet/vnet/mcast/mcast.c b/vnet/vnet/mcast/mcast.c new file mode 100644 index 00000000000..e9177c71f2c --- /dev/null +++ b/vnet/vnet/mcast/mcast.c @@ -0,0 +1,563 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/mcast/mcast.h> + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/icmp46_packet.h> +#include <vnet/ip/ip4.h> + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 group_index; +} mcast_prep_trace_t; + +/* packet trace format function */ +static u8 * format_mcast_prep_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mcast_prep_trace_t * t = va_arg (*args, mcast_prep_trace_t *); + + s = format (s, "MCAST_PREP: group %d, next index %d, tx_sw_if_index %d", + t->group_index, t->next_index, t->sw_if_index); + return s; +} + +mcast_main_t mcast_main; +vlib_node_registration_t mcast_prep_node; +vlib_node_registration_t mcast_recycle_node; + +#define foreach_mcast_prep_error \ +_(MCASTS, "Multicast Packets") + +typedef enum { +#define _(sym,str) MCAST_PREP_ERROR_##sym, + foreach_mcast_prep_error +#undef _ + MCAST_PREP_N_ERROR, +} mcast_prep_error_t; + +static char * mcast_prep_error_strings[] = { +#define _(sym,string) string, + foreach_mcast_prep_error +#undef _ +}; + +typedef enum { + MCAST_PREP_NEXT_DROP, + MCAST_PREP_N_NEXT, +} mcast_prep_next_t; + +static uword +mcast_prep_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + mcast_prep_next_t next_index; + mcast_main_t * mcm = &mcast_main; + vlib_node_t *n = vlib_get_node (vm, mcast_prep_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (0 && n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + next0 = 0; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + next1 = 0; + + /* $$$$ your message in this space. Process 2 x pkts */ + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + mcast_prep_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + mcast_prep_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0, adj_index0; + mcast_group_t * g0; + ip_adjacency_t * adj0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + adj0 = ip_get_adjacency (lm, adj_index0); + vnet_buffer(b0)->mcast.mcast_group_index = adj0->mcast_group_index; + g0 = pool_elt_at_index (mcm->groups, adj0->mcast_group_index); + + /* + * Handle the degenerate single-copy case + * If we don't change the freelist, the packet will never + * make it to the recycle node... + */ + if (PREDICT_TRUE(vec_len (g0->members) > 1)) + { + /* Save the original free list index */ + vnet_buffer(b0)->mcast.original_free_list_index = + b0->free_list_index; + + /* Swap in the multicast recycle list */ + b0->free_list_index = mcm->mcast_recycle_list_index; + + /* + * Make sure that intermediate "frees" don't screw up + */ + b0->clone_count = vec_len (g0->members); + + /* Set up for the recycle node */ + vnet_buffer(b0)->mcast.mcast_current_index = 1; + } + + /* Transmit the pkt on the first interface */ + next0 = g0->members[0].prep_and_recycle_node_next_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + g0->members[0].tx_sw_if_index; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + mcast_prep_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + t->group_index = vnet_buffer(b0)->mcast.mcast_group_index; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + em->counters[node_counter_base_index + MCAST_PREP_ERROR_MCASTS] += + frame->n_vectors; + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (mcast_prep_node) = { + .function = mcast_prep_node_fn, + .name = "mcast_prep", + .vector_size = sizeof (u32), + .format_trace = format_mcast_prep_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(mcast_prep_error_strings), + .error_strings = mcast_prep_error_strings, + + .n_next_nodes = MCAST_PREP_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [MCAST_PREP_NEXT_DROP] = "error-drop", + }, +}; + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 current_member; + u32 group_index; +} mcast_recycle_trace_t; + +static u8 * format_mcast_recycle_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mcast_recycle_trace_t * t = va_arg (*args, mcast_recycle_trace_t *); + + s = format (s, +"MCAST_R: group %d, current member %d next (node) index %d, tx_sw_if_index %d", + t->group_index, t->current_member, t->next_index, t->sw_if_index); + return s; +} + +#define foreach_mcast_recycle_error \ +_(RECYCLES, "Multicast Recycles") + +typedef enum { +#define _(sym,str) MCAST_RECYCLE_ERROR_##sym, + foreach_mcast_recycle_error +#undef _ + MCAST_RECYCLE_N_ERROR, +} mcast_recycle_error_t; + +static char * mcast_recycle_error_strings[] = { +#define _(sym,string) string, + foreach_mcast_recycle_error +#undef _ +}; + +typedef enum { + MCAST_RECYCLE_NEXT_DROP, + MCAST_RECYCLE_N_NEXT, +} mcast_recycle_next_t; + +static uword +mcast_recycle_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + mcast_recycle_next_t next_index; + mcast_main_t * mcm = &mcast_main; + vlib_node_t *n = vlib_get_node (vm, mcast_recycle_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (0 && n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + next0 = 0; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + next1 = 0; + + /* $$$$ your message in this space. Process 2 x pkts */ + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + mcast_recycle_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + mcast_recycle_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 current_member0; + mcast_group_t * g0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + g0 = pool_elt_at_index (mcm->groups, + vnet_buffer(b0)->mcast.mcast_group_index); + + /* No more replicas? */ + if (b0->clone_count == 1) + { + /* Restore the original free list index */ + b0->free_list_index = + vnet_buffer(b0)->mcast.original_free_list_index; + } + current_member0 = vnet_buffer(b0)->mcast.mcast_current_index; + + next0 = + g0->members[current_member0].prep_and_recycle_node_next_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + g0->members[current_member0].tx_sw_if_index; + + vnet_buffer(b0)->mcast.mcast_current_index = + current_member0 + 1; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + mcast_recycle_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + t->group_index = vnet_buffer(b0)->mcast.mcast_group_index; + t->current_member = current_member0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + em->counters[node_counter_base_index + MCAST_RECYCLE_ERROR_RECYCLES] += + frame->n_vectors; + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (mcast_recycle_node) = { + .function = mcast_recycle_node_fn, + .name = "mcast-recycle", + .vector_size = sizeof (u32), + .format_trace = format_mcast_recycle_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(mcast_recycle_error_strings), + .error_strings = mcast_recycle_error_strings, + + .n_next_nodes = MCAST_RECYCLE_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [MCAST_RECYCLE_NEXT_DROP] = "error-drop", + }, +}; + +/* + * fish pkts back from the recycle queue/freelist + * un-flatten the context chains + */ +static void mcast_recycle_callback (vlib_main_t *vm, + vlib_buffer_free_list_t * fl) +{ + vlib_frame_t * f = 0; + u32 n_left_from; + u32 n_left_to_next = 0; + u32 n_this_frame = 0; + u32 * from; + u32 * to_next; + u32 bi0, pi0; + vlib_buffer_t *b0; + vlib_buffer_t *bnext0; + int i; + + /* aligned, unaligned buffers */ + for (i = 0; i < 2; i++) + { + if (i == 0) + { + from = fl->aligned_buffers; + n_left_from = vec_len (from); + } + else + { + from = fl->unaligned_buffers; + n_left_from = vec_len (from); + } + + while (n_left_from > 0) + { + if (PREDICT_FALSE(n_left_to_next == 0)) + { + if (f) + { + f->n_vectors = n_this_frame; + vlib_put_frame_to_node (vm, mcast_recycle_node.index, f); + } + + f = vlib_get_frame_to_node (vm, mcast_recycle_node.index); + to_next = vlib_frame_vector_args (f); + n_left_to_next = VLIB_FRAME_SIZE; + n_this_frame = 0; + } + + bi0 = from[0]; + if (PREDICT_TRUE(n_left_from > 1)) + { + pi0 = from[1]; + vlib_prefetch_buffer_with_index(vm,pi0,LOAD); + } + + bnext0 = b0 = vlib_get_buffer (vm, bi0); + + while (bnext0->flags & VLIB_BUFFER_NEXT_PRESENT) + { + from += 1; + n_left_from -= 1; + bnext0 = vlib_get_buffer (vm, bnext0->next_buffer); + } + to_next[0] = bi0; + + if (CLIB_DEBUG > 0) + vlib_buffer_set_known_state (vm, bi0, VLIB_BUFFER_KNOWN_ALLOCATED); + + from++; + to_next++; + n_this_frame++; + n_left_to_next--; + n_left_from--; + } + } + + vec_reset_length (fl->aligned_buffers); + vec_reset_length (fl->unaligned_buffers); + + if (f) + { + ASSERT(n_this_frame); + f->n_vectors = n_this_frame; + vlib_put_frame_to_node (vm, mcast_recycle_node.index, f); + } +} + +clib_error_t *mcast_init (vlib_main_t *vm) +{ + mcast_main_t * mcm = &mcast_main; + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * fl; + + mcm->vlib_main = vm; + mcm->vnet_main = vnet_get_main(); + mcm->mcast_recycle_list_index = + vlib_buffer_create_free_list (vm, 1024 /* fictional */, "mcast-recycle"); + + fl = pool_elt_at_index (bm->buffer_free_list_pool, + mcm->mcast_recycle_list_index); + + fl->buffers_added_to_freelist_function = mcast_recycle_callback; + + return 0; +} + +VLIB_INIT_FUNCTION (mcast_init); + + diff --git a/vnet/vnet/mcast/mcast.h b/vnet/vnet/mcast/mcast.h new file mode 100644 index 00000000000..96e514427c6 --- /dev/null +++ b/vnet/vnet/mcast/mcast.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_vnet_mcast_h__ +#define __included_vnet_mcast_h__ + +#include <vnet/vnet.h> +#include <vlib/buffer.h> +#include <vlib/buffer_funcs.h> + +typedef struct { + /* Arrange for both prep and recycle nodes to have identical + next indices for a given output interface */ + u32 prep_and_recycle_node_next_index; + + /* Show command, etc. */ + u32 tx_sw_if_index; +} mcast_group_member_t; + +typedef struct { + /* vector of group members */ + mcast_group_member_t * members; +} mcast_group_t; + +typedef struct { + /* pool of multicast (interface) groups */ + mcast_group_t * groups; + + /* multicast "free" list, aka recycle list */ + u32 mcast_recycle_list_index; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} mcast_main_t; + +mcast_main_t mcast_main; + +#endif /* __included_vnet_mcast_h__ */ diff --git a/vnet/vnet/mcast/mcast_test.c b/vnet/vnet/mcast/mcast_test.c new file mode 100644 index 00000000000..4561d7cdc00 --- /dev/null +++ b/vnet/vnet/mcast/mcast_test.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/mcast/mcast.h> + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/ip/lookup.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/icmp46_packet.h> +#include <vnet/ip/ip4.h> +#include <vnet/mcast/mcast.h> + +typedef struct { + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + mcast_main_t * mcast_main; +} mcast_test_main_t; + +mcast_test_main_t mcast_test_main; +vlib_node_registration_t mcast_prep_node; +vlib_node_registration_t mcast_recycle_node; + +static clib_error_t * +mcast_test_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 *rewrite_data; + mcast_test_main_t * mtm = &mcast_test_main; + mcast_main_t * mcm = mtm->mcast_main; + ip_adjacency_t adj; + u32 adj_index; + mcast_group_t * g; + mcast_group_member_t * member; + unformat_input_t _line_input, * line_input = &_line_input; + ip4_address_t dst_addr, zero; + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + pool_get (mcm->groups, g); + memset (g, 0, sizeof (*g)); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + vnet_hw_interface_t *hw; + u32 next, sw_if_index; + + if (unformat (line_input, "%U", unformat_vnet_sw_interface, + mtm->vnet_main, &sw_if_index)) + { + vec_add2 (g->members, member, 1); + member->tx_sw_if_index = sw_if_index; + + hw = vnet_get_sup_hw_interface (mtm->vnet_main, + sw_if_index); + + next = vlib_node_add_next (mtm->vlib_main, + mcast_prep_node.index, + hw->output_node_index); + + /* Required to be the same next index... */ + vlib_node_add_next_with_slot (mtm->vlib_main, + mcast_recycle_node.index, + hw->output_node_index, next); + member->prep_and_recycle_node_next_index = next; + } + else + { + return unformat_parse_error (line_input); + } + } + + if (vec_len (g->members) == 0) + { + pool_put (mcm->groups, g); + vlib_cli_output (vm, "no group members specified"); + return 0; + } + + + adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + adj.mcast_group_index = g - mcm->groups; + rewrite_data = format (0, "abcdefg"); + + vnet_rewrite_for_tunnel + (mtm->vnet_main, + (u32)~0, /* tx_sw_if_index, we dont know yet */ + ip4_rewrite_node.index, + mcast_prep_node.index, + &adj.rewrite_header, + rewrite_data, vec_len(rewrite_data)); + + ip_add_adjacency (lm, &adj, 1 /* one adj */, + &adj_index); + + dst_addr.as_u32 = clib_host_to_net_u32 (0x0a000002); + zero.as_u32 = 0; + + ip4_add_del_route_next_hop (im, + IP4_ROUTE_FLAG_ADD, + &dst_addr, + 24 /* mask width */, + &zero /* no next hop */, + + 0, // next hop sw if index + 1, // weight + adj_index, + 0 /* explicit fib 0 */); + + return 0; +} + +static VLIB_CLI_COMMAND (mcast_test_command) = { + .path = "test mc", + .short_help = "test mc", + .function = mcast_test_command_fn, +}; + +clib_error_t *mcast_test_init (vlib_main_t *vm) +{ + mcast_test_main_t * mtm = &mcast_test_main; + + mtm->vlib_main = vm; + mtm->vnet_main = vnet_get_main(); + mtm->mcast_main = &mcast_main; + + return 0; +} + +VLIB_INIT_FUNCTION (mcast_test_init); diff --git a/vnet/vnet/misc.c b/vnet/vnet/misc.c new file mode 100644 index 00000000000..6effe6eae3e --- /dev/null +++ b/vnet/vnet/misc.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * misc.c: vnet misc + * + * Copyright (c) 2012 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> + +vnet_main_t vnet_main; + +vnet_main_t * +vnet_get_main (void) +{ + return &vnet_main; +} + +static uword +vnet_local_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + ASSERT (0); + return f->n_vectors; +} + +VNET_DEVICE_CLASS (vnet_local_interface_device_class,static) = { + .name = "local", + .tx_function = vnet_local_interface_tx, +}; + +VNET_HW_INTERFACE_CLASS (vnet_local_interface_hw_class,static) = { + .name = "local", +}; + +clib_error_t * +vnet_main_init (vlib_main_t * vm) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error; + u32 hw_if_index; + vnet_hw_interface_t * hw; + + if ((error = vlib_call_init_function (vm, vnet_interface_init))) + return error; + + vnm->vlib_main = vm; + + hw_if_index = vnet_register_interface + (vnm, + vnet_local_interface_device_class.index, /* instance */ 0, + vnet_local_interface_hw_class.index, /* instance */ 0); + hw = vnet_get_hw_interface (vnm, hw_if_index); + + vnm->local_interface_hw_if_index = hw_if_index; + vnm->local_interface_sw_if_index = hw->sw_if_index; + + return 0; +} + +VLIB_INIT_FUNCTION (vnet_main_init); diff --git a/vnet/vnet/mpls-gre/error.def b/vnet/vnet/mpls-gre/error.def new file mode 100644 index 00000000000..424ab50a030 --- /dev/null +++ b/vnet/vnet/mpls-gre/error.def @@ -0,0 +1,28 @@ +/* + * mpls_error.def: mpls errors + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +mpls_error (NONE, "no error") +mpls_error (UNKNOWN_PROTOCOL, "unknown protocol") +mpls_error (UNSUPPORTED_VERSION, "unsupported version") +mpls_error (PKTS_DECAP, "MPLS-GRE input packets decapsulated") +mpls_error (PKTS_ENCAP, "MPLS-GRE output packets encapsulated") +mpls_error (NO_LABEL, "MPLS-GRE no label for fib/dst") +mpls_error (TTL_EXPIRED, "MPLS-GRE ttl expired") +mpls_error (S_NOT_SET, "MPLS-GRE s-bit not set") +mpls_error (BAD_LABEL, "invalid FIB id in label") +mpls_error (NOT_IP4, "non-ip4 packets dropped") +mpls_error (DISALLOWED_FIB, "disallowed FIB id") diff --git a/vnet/vnet/mpls-gre/interface.c b/vnet/vnet/mpls-gre/interface.c new file mode 100644 index 00000000000..c345054bdec --- /dev/null +++ b/vnet/vnet/mpls-gre/interface.c @@ -0,0 +1,1930 @@ +/* + * interface.c: mpls interfaces + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/gre/gre.h> +#include <vnet/mpls-gre/mpls.h> + +static uword mpls_gre_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + /* + * Conundrum: packets from tun/tap destined for the tunnel + * actually have this rewrite applied. Transit packets do not. + * To make the two cases equivalent, don't generate a + * rewrite here, build the entire header in the fast path. + */ + return 0; +} + +/* manually added to the interface output node */ +#define MPLS_GRE_OUTPUT_NEXT_POST_REWRITE 1 + +static uword +mpls_gre_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + mpls_main_t * gm = &mpls_main; + vnet_main_t * vnm = gm->vnet_main; + u32 next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* + * As long as we have enough pkts left to process two pkts + * and prefetch two pkts... + */ + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, next0, bi1, next1; + mpls_gre_tunnel_t * t0, * t1; + u32 sw_if_index0, sw_if_index1; + vnet_hw_interface_t * hi0, * hi1; + u8 * dst0, * dst1; + + /* Prefetch the next iteration */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + /* + * Prefetch packet data. We expect to overwrite + * the inbound L2 header with an ip header and a + * gre header. Might want to prefetch the last line + * of rewrite space as well; need profile data + */ + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* Pick up the next two buffer indices */ + bi0 = from[0]; + bi1 = from[1]; + + /* Speculatively enqueue them where we sent the last buffer */ + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index [VLIB_TX]; + + /* get h/w intfcs */ + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); + + /* hw_instance = tunnel pool index */ + t0 = pool_elt_at_index (gm->gre_tunnels, hi0->hw_instance); + t1 = pool_elt_at_index (gm->gre_tunnels, hi1->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + vlib_buffer_advance (b1, -(word)vec_len(t1->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + dst1 = vlib_buffer_get_current (b1); + + memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + memcpy (dst1, t1->rewrite_data, vec_len(t1->rewrite_data)); + + /* Fix TX fib indices */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->outer_fib_index; + vnet_buffer(b1)->sw_if_index [VLIB_TX] = t1->outer_fib_index; + + /* mpls-post-rewrite takes it from here... */ + next0 = MPLS_GRE_OUTPUT_NEXT_POST_REWRITE; + next1 = MPLS_GRE_OUTPUT_NEXT_POST_REWRITE; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = t0 - gm->gre_tunnels; + tr->length = b0->current_length; + tr->src.as_u32 = t0->tunnel_src.as_u32; + tr->dst.as_u32 = t0->tunnel_dst.as_u32; + tr->lookup_miss = 0; + tr->mpls_encap_index = t0->encap_index; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->tunnel_id = t1 - gm->gre_tunnels; + tr->length = b1->current_length; + tr->src.as_u32 = t1->tunnel_src.as_u32; + tr->dst.as_u32 = t1->tunnel_dst.as_u32; + tr->lookup_miss = 0; + tr->mpls_encap_index = t1->encap_index; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + u32 bi0, next0; + mpls_gre_tunnel_t * t0; + u32 sw_if_index0; + vnet_hw_interface_t * hi0; + u8 * dst0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + t0 = pool_elt_at_index (gm->gre_tunnels, hi0->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + + memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + + /* Fix the TX fib index */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->outer_fib_index; + + /* mpls-post-rewrite takes it from here... */ + next0 = MPLS_GRE_OUTPUT_NEXT_POST_REWRITE; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = t0 - gm->gre_tunnels; + tr->length = b0->current_length; + tr->src.as_u32 = t0->tunnel_src.as_u32; + tr->dst.as_u32 = t0->tunnel_dst.as_u32; + tr->lookup_miss = 0; + tr->mpls_encap_index = t0->encap_index; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, gre_input_node.index, + GRE_ERROR_PKTS_ENCAP, frame->n_vectors); + + return frame->n_vectors; +} + +static u8 * format_mpls_gre_tunnel_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "mpls-gre%d", dev_instance); +} + +static u8 * format_mpls_gre_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + + s = format (s, "MPLS-GRE tunnel: id %d\n", dev_instance); + return s; +} + +VNET_DEVICE_CLASS (mpls_gre_device_class) = { + .name = "MPLS-GRE tunnel device", + .format_device_name = format_mpls_gre_tunnel_name, + .format_device = format_mpls_gre_device, + .format_tx_trace = format_mpls_gre_tx_trace, + .tx_function = mpls_gre_interface_tx, + .no_flatten_output_chains = 1, +#ifdef SOON + .clear counter = 0; + .admin_up_down_function = 0; +#endif +}; + +VNET_HW_INTERFACE_CLASS (mpls_gre_hw_interface_class) = { + .name = "MPLS-GRE", + .format_header = format_mpls_gre_header_with_length, +#if 0 + .unformat_header = unformat_mpls_gre_header, +#endif + .set_rewrite = mpls_gre_set_rewrite, +}; + + +static uword mpls_eth_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + /* + * Conundrum: packets from tun/tap destined for the tunnel + * actually have this rewrite applied. Transit packets do not. + * To make the two cases equivalent, don't generate a + * rewrite here, build the entire header in the fast path. + */ + return 0; +} + +/* manually added to the interface output node */ +#define MPLS_ETH_OUTPUT_NEXT_OUTPUT 1 + +static uword +mpls_eth_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + mpls_main_t * gm = &mpls_main; + vnet_main_t * vnm = gm->vnet_main; + u32 next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* + * As long as we have enough pkts left to process two pkts + * and prefetch two pkts... + */ + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, next0, bi1, next1; + mpls_eth_tunnel_t * t0, * t1; + u32 sw_if_index0, sw_if_index1; + vnet_hw_interface_t * hi0, * hi1; + u8 * dst0, * dst1; + + /* Prefetch the next iteration */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + /* + * Prefetch packet data. We expect to overwrite + * the inbound L2 header with an ip header and a + * gre header. Might want to prefetch the last line + * of rewrite space as well; need profile data + */ + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* Pick up the next two buffer indices */ + bi0 = from[0]; + bi1 = from[1]; + + /* Speculatively enqueue them where we sent the last buffer */ + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index [VLIB_TX]; + + /* get h/w intfcs */ + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); + + /* hw_instance = tunnel pool index */ + t0 = pool_elt_at_index (gm->eth_tunnels, hi0->hw_instance); + t1 = pool_elt_at_index (gm->eth_tunnels, hi1->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + vlib_buffer_advance (b1, -(word)vec_len(t1->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + dst1 = vlib_buffer_get_current (b1); + + memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + memcpy (dst1, t1->rewrite_data, vec_len(t1->rewrite_data)); + + /* Fix TX fib indices */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->tx_sw_if_index; + vnet_buffer(b1)->sw_if_index [VLIB_TX] = t1->tx_sw_if_index; + + /* mpls-post-rewrite takes it from here... */ + next0 = MPLS_ETH_OUTPUT_NEXT_OUTPUT; + next1 = MPLS_ETH_OUTPUT_NEXT_OUTPUT; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_eth_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->lookup_miss = 0; + tr->tunnel_id = t0 - gm->eth_tunnels; + tr->tx_sw_if_index = t0->tx_sw_if_index; + tr->mpls_encap_index = t0->encap_index; + tr->length = b0->current_length; + hi0 = vnet_get_sup_hw_interface (vnm, t0->tx_sw_if_index); + memcpy (tr->dst, hi0->hw_address, sizeof (tr->dst)); + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_eth_tx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->lookup_miss = 0; + tr->tunnel_id = t1 - gm->eth_tunnels; + tr->tx_sw_if_index = t1->tx_sw_if_index; + tr->mpls_encap_index = t1->encap_index; + tr->length = b0->current_length; + hi1 = vnet_get_sup_hw_interface (vnm, t1->tx_sw_if_index); + memcpy (tr->dst, hi1->hw_address, sizeof (tr->dst)); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + u32 bi0, next0; + mpls_eth_tunnel_t * t0; + u32 sw_if_index0; + vnet_hw_interface_t * hi0; + u8 * dst0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + t0 = pool_elt_at_index (gm->eth_tunnels, hi0->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + + memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + + /* Fix the TX interface */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->tx_sw_if_index; + + /* Send the packet */ + next0 = MPLS_ETH_OUTPUT_NEXT_OUTPUT; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_eth_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->lookup_miss = 0; + tr->tunnel_id = t0 - gm->eth_tunnels; + tr->tx_sw_if_index = t0->tx_sw_if_index; + tr->mpls_encap_index = t0->encap_index; + tr->length = b0->current_length; + hi0 = vnet_get_sup_hw_interface (vnm, t0->tx_sw_if_index); + memcpy (tr->dst, hi0->hw_address, sizeof (tr->dst)); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_ENCAP, frame->n_vectors); + + return frame->n_vectors; +} + +static u8 * format_mpls_eth_tunnel_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "mpls-eth%d", dev_instance); +} + +static u8 * format_mpls_eth_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + + s = format (s, "MPLS-ETH tunnel: id %d\n", dev_instance); + return s; +} + +VNET_DEVICE_CLASS (mpls_eth_device_class) = { + .name = "MPLS-ETH tunnel device", + .format_device_name = format_mpls_eth_tunnel_name, + .format_device = format_mpls_eth_device, + .format_tx_trace = format_mpls_eth_tx_trace, + .tx_function = mpls_eth_interface_tx, + .no_flatten_output_chains = 1, +#ifdef SOON + .clear counter = 0; + .admin_up_down_function = 0; +#endif +}; + + +VNET_HW_INTERFACE_CLASS (mpls_eth_hw_interface_class) = { + .name = "MPLS-ETH", + .format_header = format_mpls_eth_header_with_length, +#if 0 + .unformat_header = unformat_mpls_eth_header, +#endif + .set_rewrite = mpls_eth_set_rewrite, +}; + +#define foreach_mpls_post_rewrite_next \ + _ (IP4_LOOKUP, "ip4-lookup") + +typedef enum { +#define _(s,n) MPLS_POST_REWRITE_NEXT_##s, + foreach_mpls_post_rewrite_next +#undef _ + MPLS_POST_REWRITE_N_NEXT, +} mpls_post_rewrite_next_t; + + +static uword +mpls_post_rewrite (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + u16 old_l0 = 0, old_l1 = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + ip4_header_t * ip0, * ip1; + u32 next0 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP; + u32 next1 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP; + u16 new_l0, new_l1; + ip_csum_t sum0, sum1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] */ + + /* set the GRE (outer) ip packet length, fix the bloody checksum */ + sum0 = ip0->checksum; + sum1 = ip1->checksum; + + /* old_l0, old_l1 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + new_l1 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip1->checksum = ip_csum_fold (sum1); + ip0->length = new_l0; + ip1->length = new_l1; + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + ip4_header_t * ip0; + u32 next0 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP; + u16 new_l0; + ip_csum_t sum0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] */ + + /* set the GRE (outer) ip packet length, fix the bloody checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_ENCAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (mpls_post_rewrite_node) = { + .function = mpls_post_rewrite, + .name = "mpls-post-rewrite", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = 0, + + .n_next_nodes = MPLS_POST_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_POST_REWRITE_NEXT_##s] = n, + foreach_mpls_post_rewrite_next +#undef _ + }, +}; + +static u8 * mpls_gre_rewrite (mpls_main_t *mm, mpls_gre_tunnel_t * t) +{ + ip4_header_t * ip0; + ip4_gre_and_mpls_header_t * h0; + u8 * rewrite_data = 0; + mpls_encap_t * e; + mpls_unicast_header_t *lp0; + int i; + + /* look up the encap label stack using the RX FIB */ + e = mpls_encap_by_fib_and_dest (mm, t->inner_fib_index, t->tunnel_dst.as_u32); + + if (e == 0) + { + clib_warning ("no label for inner fib index %d, dst %U", + t->inner_fib_index, format_ip4_address, + &t->tunnel_dst); + return 0; + } + + vec_validate (rewrite_data, sizeof (*h0) + + sizeof (mpls_unicast_header_t) * vec_len(e->labels) -1); + memset (rewrite_data, 0, sizeof (*h0)); + + h0 = (ip4_gre_and_mpls_header_t *) rewrite_data; + /* Copy the encap label stack */ + lp0 = h0->labels; + for (i = 0; i < vec_len(e->labels); i++) + lp0[i] = e->labels[i]; + ip0 = &h0->ip4; + h0->gre.protocol = clib_host_to_net_u16(GRE_PROTOCOL_mpls_unicast); + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_GRE; + /* $$$ fixup ip4 header length and checksum after-the-fact */ + ip0->src_address.as_u32 = t->tunnel_src.as_u32; + ip0->dst_address.as_u32 = t->tunnel_dst.as_u32; + ip0->checksum = ip4_header_checksum (ip0); + + return (rewrite_data); +} + +int vnet_mpls_gre_add_del_tunnel (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, u32 outer_fib_id, + u32 * tunnel_sw_if_index, + u8 l2_only, + u8 is_add) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t zero; + mpls_gre_tunnel_t *tp; + int need_route_add_del = 1; + u32 inner_fib_index = 0; + u32 outer_fib_index = 0; + ip_adjacency_t adj; + u32 adj_index; + u8 * rewrite_data; + int found_tunnel = 0; + mpls_encap_t * e = 0; + u32 hw_if_index = ~0; + vnet_hw_interface_t * hi; + u32 slot; + u32 dummy; + + zero.as_u32 = 0; + + /* No questions, no answers */ + if (tunnel_sw_if_index == 0) + tunnel_sw_if_index = &dummy; + + *tunnel_sw_if_index = ~0; + + if (inner_fib_id != (u32)~0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, inner_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_INNER_FIB; + inner_fib_index = p[0]; + } + + if (outer_fib_id != 0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, outer_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + outer_fib_index = p[0]; + } + + /* suppress duplicate mpls interface generation. */ + pool_foreach (tp, mm->gre_tunnels, + ({ + /* + * If we have a tunnel which matches (src, dst, intfc/mask) + * AND the expected route is in the FIB, it's a dup + */ + if (!memcmp (&tp->tunnel_src, src, sizeof (*src)) + && !memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) + && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) + && tp->inner_fib_index == inner_fib_index) + { + ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index); + uword * hash = fib->adj_index_by_dst_address[mask_width]; + uword key = intfc->as_u32 & im->fib_masks[mask_width]; + uword *p = hash_get (hash, key); + + found_tunnel = 1; + + if (is_add) + { + /* A dup, and the route is in the fib. Done */ + if (p || l2_only) + return 1; + else + { + /* Reinstall the route (and other stuff) */ + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, + dst->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + goto reinstall_it; + } + } + else + { + /* Delete, the route is already gone? */ + if (!p) + need_route_add_del = 0; + goto add_del_route; + } + + } + })); + + /* Delete, and we can't find the tunnel */ + if (is_add == 0 && found_tunnel == 0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, dst->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + pool_get(mm->gre_tunnels, tp); + memset (tp, 0, sizeof (*tp)); + + if (vec_len (mm->free_gre_sw_if_indices) > 0) + { + hw_if_index = + mm->free_gre_sw_if_indices[vec_len(mm->free_gre_sw_if_indices)-1]; + _vec_len (mm->free_gre_sw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = tp - mm->gre_tunnels; + hi->hw_instance = tp - mm->gre_tunnels; + } + else + { + hw_if_index = vnet_register_interface + (vnm, mpls_gre_device_class.index, tp - mm->gre_tunnels, + mpls_gre_hw_interface_class.index, + tp - mm->gre_tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + + /* ... to make the IP and L2 x-connect cases identical */ + slot = vlib_node_add_named_next_with_slot + (vnm->vlib_main, hi->tx_node_index, + "mpls-post-rewrite", MPLS_GRE_OUTPUT_NEXT_POST_REWRITE); + + ASSERT (slot == MPLS_GRE_OUTPUT_NEXT_POST_REWRITE); + } + + *tunnel_sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + tp->hw_if_index = hw_if_index; + + reinstall_it: + tp->tunnel_src.as_u32 = src->as_u32; + tp->tunnel_dst.as_u32 = dst->as_u32; + tp->intfc_address.as_u32 = intfc->as_u32; + tp->mask_width = mask_width; + tp->inner_fib_index = inner_fib_index; + tp->outer_fib_index = outer_fib_index; + tp->encap_index = e - mm->encaps; + tp->l2_only = l2_only; + + /* Create the adjacency and add to v4 fib */ + memset(&adj, 0, sizeof (adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + + rewrite_data = mpls_gre_rewrite (mm, tp); + if (rewrite_data == 0) + { + if (*tunnel_sw_if_index != ~0) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); + } + pool_put (mm->gre_tunnels, tp); + return VNET_API_ERROR_NO_SUCH_LABEL; + } + + /* Save a copy of the rewrite data for L2 x-connect */ + vec_free (tp->rewrite_data); + + tp->rewrite_data = rewrite_data; + + vnet_rewrite_for_tunnel + (vnm, + outer_fib_index /* tx_sw_if_index, aka outer fib ID */, + ip4_rewrite_node.index, + mpls_post_rewrite_node.index, + &adj.rewrite_header, + rewrite_data, vec_len(rewrite_data)); + + if (!l2_only) + ip_add_adjacency (lm, &adj, 1 /* one adj */, + &adj_index); + + add_del_route: + + if (need_route_add_del && !l2_only) + { + if (is_add) + ip4_add_del_route_next_hop (im, + IP4_ROUTE_FLAG_ADD, + &tp->intfc_address, + tp->mask_width, + &zero /* no next hop */, + (u32)~0 /* next_hop_sw_if_index */, + 1 /* weight */, + adj_index, + tp->inner_fib_index); + else + { + ip4_add_del_route_args_t a; + memset (&a, 0, sizeof (a)); + + a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; + a.table_index_or_table_id = tp->inner_fib_index; + a.dst_address = tp->intfc_address; + a.dst_address_length = tp->mask_width; + a.adj_index = ~0; + + ip4_add_del_route (im, &a); + ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, + IP4_ROUTE_FLAG_FIB_INDEX); + } + } + + if (is_add == 0 && found_tunnel) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); + vec_free (tp->rewrite_data); + pool_put (mm->gre_tunnels, tp); + } + + return 0; +} + +/* + * Remove all mpls tunnels in the specified fib + */ +int vnet_mpls_gre_delete_fib_tunnels (u32 fib_id) +{ + ip4_main_t * im = &ip4_main; + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = mm->vnet_main; + mpls_gre_tunnel_t *tp; + u32 fib_index = 0; + uword * p; + u32 * tunnels_to_delete = 0; + vnet_hw_interface_t * hi; + ip4_fib_t * fib; + int i; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_INNER_FIB; + fib_index = p[0]; + + pool_foreach (tp, mm->gre_tunnels, + ({ + if (tp->inner_fib_index == fib_index) + vec_add1 (tunnels_to_delete, tp - mm->gre_tunnels); + })); + + fib = vec_elt_at_index (im->fibs, fib_index); + + for (i = 0; i < vec_len(tunnels_to_delete); i++) { + tp = pool_elt_at_index (mm->gre_tunnels, tunnels_to_delete[i]); + uword * hash = fib->adj_index_by_dst_address[tp->mask_width]; + uword key = tp->intfc_address.as_u32 & im->fib_masks[tp->mask_width]; + uword *p = hash_get (hash, key); + ip4_add_del_route_args_t a; + + /* Delete, the route if not already gone */ + if (p && !tp->l2_only) + { + memset (&a, 0, sizeof (a)); + a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; + a.table_index_or_table_id = tp->inner_fib_index; + a.dst_address = tp->intfc_address; + a.dst_address_length = tp->mask_width; + a.adj_index = ~0; + ip4_add_del_route (im, &a); + ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, + IP4_ROUTE_FLAG_FIB_INDEX); + } + + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); + vec_free (tp->rewrite_data); + pool_put (mm->gre_tunnels, tp); + } + + vec_free(tunnels_to_delete); + + return (0); +} + +static clib_error_t * +create_mpls_gre_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ip4_address_t src, dst, intfc; + int src_set = 0, dst_set = 0, intfc_set = 0; + u32 mask_width; + u32 inner_fib_id = (u32)~0; + u32 outer_fib_id = 0; + int rv; + u8 is_del = 0; + u8 l2_only = 0; + u32 tunnel_intfc_sw_if_index = ~0; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "src %U", + unformat_ip4_address, &src)) + src_set = 1; + else if (unformat (line_input, "dst %U", + unformat_ip4_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "intfc %U/%d", + unformat_ip4_address, &intfc, &mask_width)) + intfc_set = 1; + else if (unformat (line_input, "inner-fib-id %d", &inner_fib_id)) + ; + else if (unformat (line_input, "outer-fib-id %d", &outer_fib_id)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (!src_set) + return clib_error_return (0, "missing: src <ip-address>"); + + if (!dst_set) + return clib_error_return (0, "missing: dst <ip-address>"); + + if (!intfc_set) + return clib_error_return (0, "missing: intfc <ip-address>/<mask-width>"); + + + rv = vnet_mpls_gre_add_del_tunnel (&src, &dst, &intfc, mask_width, + inner_fib_id, outer_fib_id, + &tunnel_intfc_sw_if_index, + l2_only, !is_del); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_INNER_FIB: + return clib_error_return (0, "inner fib ID %d doesn't exist\n", + inner_fib_id); + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "outer fib ID %d doesn't exist\n", + outer_fib_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel not found\n"); + + case VNET_API_ERROR_NO_SUCH_LABEL: + /* + * This happens when there's no MPLS label for the dst address + * no need for two error messages. + */ + break; + + default: + return clib_error_return (0, "vnet_mpls_gre_add_del_tunnel returned %d", + rv); + } + return 0; +} + +VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = { + .path = "create mpls gre tunnel", + .short_help = + "create mpls gre tunnel [del] src <addr> dst <addr> intfc <addr>/<mw>", + .function = create_mpls_gre_tunnel_command_fn, +}; + +u8 * format_mpls_encap_index (u8 * s, va_list * args) +{ + mpls_main_t * mm = va_arg (*args, mpls_main_t *); + u32 entry_index = va_arg (*args, u32); + mpls_encap_t * e; + int i; + + e = pool_elt_at_index (mm->encaps, entry_index); + + for (i = 0; i < vec_len (e->labels); i++) + s = format + (s, "%d ", vnet_mpls_uc_get_label(clib_net_to_host_u32 + (e->labels[i].label_exp_s_ttl))); + + return s; +} + +u8 * format_mpls_gre_tunnel (u8 * s, va_list * args) +{ + mpls_gre_tunnel_t * t = va_arg (*args, mpls_gre_tunnel_t *); + mpls_main_t * mm = &mpls_main; + + if (t->l2_only == 0) + { + s = format (s, "[%d]: src %U, dst %U, adj %U/%d, labels %U\n", + t - mm->gre_tunnels, + format_ip4_address, &t->tunnel_src, + format_ip4_address, &t->tunnel_dst, + format_ip4_address, &t->intfc_address, + t->mask_width, + format_mpls_encap_index, mm, t->encap_index); + + s = format (s, " inner fib index %d, outer fib index %d", + t->inner_fib_index, t->outer_fib_index); + } + else + { + s = format (s, "[%d]: src %U, dst %U, key %U, labels %U\n", + t - mm->gre_tunnels, + format_ip4_address, &t->tunnel_src, + format_ip4_address, &t->tunnel_dst, + format_ip4_address, &t->intfc_address, + format_mpls_encap_index, mm, t->encap_index); + + s = format (s, " l2 interface %d, outer fib index %d", + t->hw_if_index, t->outer_fib_index); + } + + return s; +} + +u8 * format_mpls_ethernet_tunnel (u8 * s, va_list * args) +{ + mpls_eth_tunnel_t * t = va_arg (*args, mpls_eth_tunnel_t *); + mpls_main_t * mm = &mpls_main; + + s = format (s, "[%d]: dst %U, adj %U/%d, labels %U\n", + t - mm->eth_tunnels, + format_ethernet_address, &t->tunnel_dst, + format_ip4_address, &t->intfc_address, + t->mask_width, + format_mpls_encap_index, mm, t->encap_index); + + + s = format (s, " tx on %U, rx fib index %d", + format_vnet_sw_if_index_name, mm->vnet_main, t->tx_sw_if_index, + t->inner_fib_index); + + return s; +} + +static clib_error_t * +show_mpls_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + mpls_main_t * mm = &mpls_main; + mpls_gre_tunnel_t * gt; + mpls_eth_tunnel_t * et; + + if (pool_elts (mm->gre_tunnels)) + { + vlib_cli_output (vm, "MPLS-GRE tunnels"); + pool_foreach (gt, mm->gre_tunnels, + ({ + vlib_cli_output (vm, "%U", format_mpls_gre_tunnel, gt); + })); + } + else + vlib_cli_output (vm, "No MPLS-GRE tunnels"); + + if (pool_elts (mm->eth_tunnels)) + { + vlib_cli_output (vm, "MPLS-Ethernet tunnels"); + pool_foreach (et, mm->eth_tunnels, + ({ + vlib_cli_output (vm, "%U", format_mpls_ethernet_tunnel, et); + })); + } + else + vlib_cli_output (vm, "No MPLS-Ethernet tunnels"); + + return 0; +} + +VLIB_CLI_COMMAND (show_mpls_tunnel_command, static) = { + .path = "show mpls tunnel", + .short_help = "show mpls tunnel", + .function = show_mpls_tunnel_command_fn, +}; + +/* force inclusion from application's main.c */ +clib_error_t *mpls_interface_init (vlib_main_t *vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, mpls_policy_encap_init))) + return error; + + return 0; +} +VLIB_INIT_FUNCTION(mpls_interface_init); + + +static u8 * mpls_ethernet_rewrite (mpls_main_t *mm, mpls_eth_tunnel_t * t) +{ + u8 * rewrite_data = 0; + mpls_encap_t * e; + mpls_unicast_header_t *lp0; + int i; + + /* look up the encap label stack using the RX FIB and adjacency address*/ + e = mpls_encap_by_fib_and_dest (mm, t->inner_fib_index, + t->intfc_address.as_u32); + + if (e == 0) + { + clib_warning ("no label for inner fib index %d, dst %U", + t->inner_fib_index, format_ip4_address, + &t->intfc_address); + return 0; + } + + vec_validate (rewrite_data, + sizeof (mpls_unicast_header_t) * vec_len(e->labels) -1); + + /* Copy the encap label stack */ + lp0 = (mpls_unicast_header_t *) rewrite_data; + + for (i = 0; i < vec_len(e->labels); i++) + lp0[i] = e->labels[i]; + + return (rewrite_data); +} + +int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, + u32 tx_sw_if_index, + u32 * tunnel_sw_if_index, + u8 l2_only, + u8 is_add) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t zero; + mpls_eth_tunnel_t *tp; + int need_route_add_del = 1; + u32 inner_fib_index = 0; + ip_adjacency_t adj; + u32 adj_index; + u8 * rewrite_data; + int found_tunnel = 0; + mpls_encap_t * e = 0; + u32 hw_if_index = ~0; + vnet_hw_interface_t * hi; + u32 slot; + u32 dummy; + + zero.as_u32 = 0; + + if (tunnel_sw_if_index == 0) + tunnel_sw_if_index = &dummy; + + *tunnel_sw_if_index = ~0; + + if (inner_fib_id != (u32)~0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, inner_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + inner_fib_index = p[0]; + } + + /* suppress duplicate mpls interface generation. */ + pool_foreach (tp, mm->eth_tunnels, + ({ + /* + * If we have a tunnel which matches (src, dst, intfc/mask) + * AND the expected route is in the FIB, it's a dup + */ + if (!memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) + && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) + && tp->inner_fib_index == inner_fib_index) + { + ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index); + uword * hash = fib->adj_index_by_dst_address[mask_width]; + uword key = intfc->as_u32 & im->fib_masks[mask_width]; + uword *p = hash_get (hash, key); + + found_tunnel = 1; + + if (is_add) + { + if (p || l2_only) + return 1; + else + { + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, + intfc->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + goto reinstall_it; + } + } + else + { + /* Delete, the route is already gone? */ + if (!p) + need_route_add_del = 0; + goto add_del_route; + } + + } + })); + + /* Delete, and we can't find the tunnel */ + if (is_add == 0 && found_tunnel == 0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, intfc->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + pool_get(mm->eth_tunnels, tp); + memset (tp, 0, sizeof (*tp)); + + if (vec_len (mm->free_eth_sw_if_indices) > 0) + { + hw_if_index = + mm->free_eth_sw_if_indices[vec_len(mm->free_eth_sw_if_indices)-1]; + _vec_len (mm->free_eth_sw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = tp - mm->eth_tunnels; + hi->hw_instance = tp - mm->eth_tunnels; + } + else + { + hw_if_index = vnet_register_interface + (vnm, mpls_eth_device_class.index, tp - mm->eth_tunnels, + mpls_eth_hw_interface_class.index, + tp - mm->eth_tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + + /* ... to make the IP and L2 x-connect cases identical */ + slot = vlib_node_add_named_next_with_slot + (vnm->vlib_main, hi->tx_node_index, + "interface-output", MPLS_ETH_OUTPUT_NEXT_OUTPUT); + + ASSERT (slot == MPLS_ETH_OUTPUT_NEXT_OUTPUT); + } + + *tunnel_sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + tp->hw_if_index = hw_if_index; + + reinstall_it: + memcpy(tp->tunnel_dst, dst, sizeof (tp->tunnel_dst)); + tp->intfc_address.as_u32 = intfc->as_u32; + tp->mask_width = mask_width; + tp->inner_fib_index = inner_fib_index; + tp->encap_index = e - mm->encaps; + tp->tx_sw_if_index = tx_sw_if_index; + tp->l2_only = l2_only; + + /* Create the adjacency and add to v4 fib */ + memset(&adj, 0, sizeof (adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + + rewrite_data = mpls_ethernet_rewrite (mm, tp); + if (rewrite_data == 0) + { + if (*tunnel_sw_if_index != ~0) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_eth_sw_if_indices, tp->hw_if_index); + } + + pool_put (mm->eth_tunnels, tp); + return VNET_API_ERROR_NO_SUCH_LABEL; + } + + vnet_rewrite_for_sw_interface + (vnm, + VNET_L3_PACKET_TYPE_MPLS_UNICAST, + tx_sw_if_index, + ip4_rewrite_node.index, + tp->tunnel_dst, + &adj.rewrite_header, + sizeof (adj.rewrite_data)); + + /* + * Prepend the (0,1,2) VLAN tag ethernet header + * we just built to the mpls header stack + */ + vec_insert (rewrite_data, adj.rewrite_header.data_bytes, 0); + memcpy(rewrite_data, + vnet_rewrite_get_data_internal(&adj.rewrite_header, + sizeof (adj.rewrite_data)), + adj.rewrite_header.data_bytes); + + vnet_rewrite_set_data_internal (&adj.rewrite_header, + sizeof(adj.rewrite_data), + rewrite_data, + vec_len(rewrite_data)); + + vec_free (tp->rewrite_data); + + tp->rewrite_data = rewrite_data; + + if (!l2_only) + ip_add_adjacency (lm, &adj, 1 /* one adj */, + &adj_index); + + add_del_route: + + if (need_route_add_del && !l2_only) + { + if (is_add) + ip4_add_del_route_next_hop (im, + IP4_ROUTE_FLAG_ADD, + &tp->intfc_address, + tp->mask_width, + &zero /* no next hop */, + (u32)~0 /* next_hop_sw_if_index */, + 1 /* weight */, + adj_index, + tp->inner_fib_index); + else + { + ip4_add_del_route_args_t a; + memset (&a, 0, sizeof (a)); + + a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; + a.table_index_or_table_id = tp->inner_fib_index; + a.dst_address = tp->intfc_address; + a.dst_address_length = tp->mask_width; + a.adj_index = ~0; + + ip4_add_del_route (im, &a); + ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, + IP4_ROUTE_FLAG_FIB_INDEX); + } + } + if (is_add == 0 && found_tunnel) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_eth_sw_if_indices, tp->hw_if_index); + vec_free (tp->rewrite_data); + pool_put (mm->eth_tunnels, tp); + } + + return 0; +} + +static clib_error_t * +create_mpls_ethernet_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t intfc; + int adj_set = 0; + u8 dst[6]; + int dst_set = 0, intfc_set = 0; + u32 mask_width; + u32 inner_fib_id = (u32)~0; + int rv; + u8 is_del = 0; + u8 l2_only = 0; + u32 tx_sw_if_index; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "dst %U", + unformat_ethernet_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "adj %U/%d", + unformat_ip4_address, &intfc, &mask_width)) + adj_set = 1; + else if (unformat (line_input, "tx-intfc %U", + unformat_vnet_sw_interface, vnm, &tx_sw_if_index)) + intfc_set = 1; + else if (unformat (line_input, "fib-id %d", &inner_fib_id)) + ; + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else if (unformat (line_input, "del")) + is_del = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (!intfc_set) + return clib_error_return (0, "missing tx-intfc"); + + if (!dst_set) + return clib_error_return (0, "missing: dst <ethernet-address>"); + + if (!adj_set) + return clib_error_return (0, "missing: intfc <ip-address>/<mask-width>"); + + + rv = vnet_mpls_ethernet_add_del_tunnel (dst, &intfc, mask_width, + inner_fib_id, tx_sw_if_index, + 0 /* tunnel sw_if_index */, + l2_only, !is_del); + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "rx fib ID %d doesn't exist\n", + inner_fib_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel not found\n"); + + case VNET_API_ERROR_NO_SUCH_LABEL: + /* + * This happens when there's no MPLS label for the dst address + * no need for two error messages. + */ + return clib_error_return (0, "no label for %U in fib %d", + format_ip4_address, &intfc, inner_fib_id); + break; + + default: + break; + } + return 0; +} + + +VLIB_CLI_COMMAND (create_mpls_ethernet_tunnel_command, static) = { + .path = "create mpls ethernet tunnel", + .short_help = + "create mpls ethernet tunnel [del] dst <mac-addr> intfc <addr>/<mw>", + .function = create_mpls_ethernet_tunnel_command_fn, +}; + + +int vnet_mpls_policy_tunnel_add_rewrite (mpls_main_t * mm, + mpls_encap_t * e, + u32 policy_tunnel_index) +{ + mpls_eth_tunnel_t * t; + ip_adjacency_t adj; + u8 * rewrite_data = 0; + u8 * label_start; + mpls_unicast_header_t *lp; + int i; + + if (pool_is_free_index (mm->eth_tunnels, policy_tunnel_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + t = pool_elt_at_index (mm->eth_tunnels, policy_tunnel_index); + + memset (&adj, 0, sizeof (adj)); + + /* Build L2 encap */ + vnet_rewrite_for_sw_interface + (mm->vnet_main, + VNET_L3_PACKET_TYPE_MPLS_UNICAST, + t->tx_sw_if_index, + mpls_policy_encap_node.index, + t->tunnel_dst, + &adj.rewrite_header, + sizeof (adj.rewrite_data)); + + vec_validate (rewrite_data, adj.rewrite_header.data_bytes -1); + + memcpy(rewrite_data, + vnet_rewrite_get_data_internal(&adj.rewrite_header, + sizeof (adj.rewrite_data)), + adj.rewrite_header.data_bytes); + + /* Append the label stack */ + + vec_add2 (rewrite_data, label_start, vec_len(e->labels) * sizeof (u32)); + + lp = (mpls_unicast_header_t *) label_start; + + for (i = 0; i < vec_len(e->labels); i++) + lp[i] = e->labels[i]; + + /* Remember the rewrite data */ + e->rewrite = rewrite_data; + e->output_next_index = adj.rewrite_header.next_index; + + return 0; +} + +int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, + u32 tx_sw_if_index, + u32 * tunnel_sw_if_index, + u32 classify_table_index, + u32 * new_tunnel_index, + u8 l2_only, + u8 is_add) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t zero; + mpls_eth_tunnel_t *tp; + int need_route_add_del = 1; + u32 inner_fib_index = 0; + ip_adjacency_t adj; + u32 adj_index; + int found_tunnel = 0; + mpls_encap_t * e = 0; + u32 hw_if_index = ~0; + vnet_hw_interface_t * hi; + u32 slot; + u32 dummy; + + zero.as_u32 = 0; + + if (tunnel_sw_if_index == 0) + tunnel_sw_if_index = &dummy; + + *tunnel_sw_if_index = ~0; + + if (inner_fib_id != (u32)~0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, inner_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + inner_fib_index = p[0]; + } + + /* suppress duplicate mpls interface generation. */ + pool_foreach (tp, mm->eth_tunnels, + ({ + /* + * If we have a tunnel which matches (src, dst, intfc/mask) + * AND the expected route is in the FIB, it's a dup + */ + if (!memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) + && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) + && tp->inner_fib_index == inner_fib_index) + { + ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index); + uword * hash = fib->adj_index_by_dst_address[mask_width]; + uword key = intfc->as_u32 & im->fib_masks[mask_width]; + uword *p = hash_get (hash, key); + + found_tunnel = 1; + + if (is_add) + { + if (p || l2_only) + return 1; + else + { + goto reinstall_it; + } + } + else + { + /* Delete, the route is already gone? */ + if (!p) + need_route_add_del = 0; + goto add_del_route; + } + + } + })); + + /* Delete, and we can't find the tunnel */ + if (is_add == 0 && found_tunnel == 0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + pool_get(mm->eth_tunnels, tp); + memset (tp, 0, sizeof (*tp)); + + if (vec_len (mm->free_eth_sw_if_indices) > 0) + { + hw_if_index = + mm->free_eth_sw_if_indices[vec_len(mm->free_eth_sw_if_indices)-1]; + _vec_len (mm->free_eth_sw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = tp - mm->eth_tunnels; + hi->hw_instance = tp - mm->eth_tunnels; + } + else + { + hw_if_index = vnet_register_interface + (vnm, mpls_eth_device_class.index, tp - mm->eth_tunnels, + mpls_eth_hw_interface_class.index, + tp - mm->eth_tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + + /* ... to make the IP and L2 x-connect cases identical */ + slot = vlib_node_add_named_next_with_slot + (vnm->vlib_main, hi->tx_node_index, + "interface-output", MPLS_ETH_OUTPUT_NEXT_OUTPUT); + + ASSERT (slot == MPLS_ETH_OUTPUT_NEXT_OUTPUT); + } + + *tunnel_sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + tp->hw_if_index = hw_if_index; + + reinstall_it: + memcpy(tp->tunnel_dst, dst, sizeof (tp->tunnel_dst)); + tp->intfc_address.as_u32 = intfc->as_u32; + tp->mask_width = mask_width; + tp->inner_fib_index = inner_fib_index; + tp->encap_index = e - mm->encaps; + tp->tx_sw_if_index = tx_sw_if_index; + tp->l2_only = l2_only; + + if (new_tunnel_index) + *new_tunnel_index = tp - mm->eth_tunnels; + + /* Create the classify adjacency and add to v4 fib */ + memset(&adj, 0, sizeof (adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY; + adj.classify_table_index = classify_table_index; + + if (!l2_only) + ip_add_adjacency (lm, &adj, 1 /* one adj */, + &adj_index); + + add_del_route: + + if (need_route_add_del && !l2_only) + { + if (is_add) + ip4_add_del_route_next_hop (im, + IP4_ROUTE_FLAG_ADD, + &tp->intfc_address, + tp->mask_width, + &zero /* no next hop */, + (u32)~0 /* next_hop_sw_if_index */, + 1 /* weight */, + adj_index, + tp->inner_fib_index); + else + { + ip4_add_del_route_args_t a; + memset (&a, 0, sizeof (a)); + + a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; + a.table_index_or_table_id = tp->inner_fib_index; + a.dst_address = tp->intfc_address; + a.dst_address_length = tp->mask_width; + a.adj_index = ~0; + + ip4_add_del_route (im, &a); + ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, + IP4_ROUTE_FLAG_FIB_INDEX); + } + } + if (is_add == 0 && found_tunnel) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_eth_sw_if_indices, tp->hw_if_index); + pool_put (mm->eth_tunnels, tp); + } + + return 0; +} + +static clib_error_t * +create_mpls_ethernet_policy_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t intfc; + int adj_set = 0; + u8 dst[6]; + int dst_set = 0, intfc_set = 0; + u32 mask_width; + u32 inner_fib_id = (u32)~0; + u32 classify_table_index = (u32)~0; + u32 new_tunnel_index; + int rv; + u8 is_del = 0; + u8 l2_only = 0; + u32 tx_sw_if_index; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "dst %U", + unformat_ethernet_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "adj %U/%d", + unformat_ip4_address, &intfc, &mask_width)) + adj_set = 1; + else if (unformat (line_input, "tx-intfc %U", + unformat_vnet_sw_interface, vnm, &tx_sw_if_index)) + intfc_set = 1; + else if (unformat (line_input, "classify-table-index %d", + &classify_table_index)) + ; + else if (unformat (line_input, "fib-id %d", &inner_fib_id)) + ; + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else if (unformat (line_input, "del")) + is_del = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (classify_table_index == ~0) + return clib_error_return (0, "missing classify_table_index"); + + if (!intfc_set) + return clib_error_return (0, "missing tx-intfc"); + + if (!dst_set) + return clib_error_return (0, "missing: dst <ethernet-address>"); + + if (!adj_set) + return clib_error_return (0, "missing: intfc <ip-address>/<mask-width>"); + + + rv = vnet_mpls_ethernet_add_del_policy_tunnel (dst, &intfc, mask_width, + inner_fib_id, tx_sw_if_index, + 0 /* tunnel sw_if_index */, + classify_table_index, + &new_tunnel_index, + l2_only, !is_del); + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "rx fib ID %d doesn't exist\n", + inner_fib_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel not found\n"); + + case VNET_API_ERROR_NO_SUCH_LABEL: + /* + * This happens when there's no MPLS label for the dst address + * no need for two error messages. + */ + return clib_error_return (0, "no label for %U in fib %d", + format_ip4_address, &intfc, inner_fib_id); + break; + + default: + break; + } + + if (!is_del) + vlib_cli_output (vm, "tunnel index %d", new_tunnel_index); + + return 0; +} + +VLIB_CLI_COMMAND (create_mpls_ethernet_policy_tunnel_command, static) = { + .path = "create mpls ethernet policy tunnel", + .short_help = + "create mpls ethernet policy tunnel [del] dst <mac-addr> intfc <addr>/<mw>\n" + " classify-table-index <nn>", + .function = create_mpls_ethernet_policy_tunnel_command_fn, +}; diff --git a/vnet/vnet/mpls-gre/mpls.c b/vnet/vnet/mpls-gre/mpls.c new file mode 100644 index 00000000000..431a69b4ab0 --- /dev/null +++ b/vnet/vnet/mpls-gre/mpls.c @@ -0,0 +1,769 @@ +/* + * mpls.c: mpls + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/mpls-gre/mpls.h> + +mpls_main_t mpls_main; + +u8 * format_mpls_gre_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_gre_tx_trace_t * t = va_arg (*args, mpls_gre_tx_trace_t *); + mpls_main_t * mm = &mpls_main; + + if (t->lookup_miss) + s = format (s, "MPLS: lookup miss"); + else + { + s = format (s, "MPLS: tunnel %d labels %U len %d src %U dst %U", + t->tunnel_id, + format_mpls_encap_index, mm, t->mpls_encap_index, + clib_net_to_host_u16 (t->length), + format_ip4_address, &t->src.as_u8, + format_ip4_address, &t->dst.as_u8); + } + return s; +} + +u8 * format_mpls_eth_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_eth_tx_trace_t * t = va_arg (*args, mpls_eth_tx_trace_t *); + mpls_main_t * mm = &mpls_main; + + if (t->lookup_miss) + s = format (s, "MPLS: lookup miss"); + else + { + s = format (s, "MPLS: tunnel %d labels %U len %d tx_sw_index %d dst %U", + t->tunnel_id, + format_mpls_encap_index, mm, t->mpls_encap_index, + clib_net_to_host_u16 (t->length), + t->tx_sw_if_index, + format_ethernet_address, t->dst); + } + return s; +} + +u8 * format_mpls_eth_header_with_length (u8 * s, va_list * args) +{ + ethernet_header_t * h = va_arg (*args, ethernet_header_t *); + mpls_unicast_header_t * m = (mpls_unicast_header_t *)(h+1); + u32 max_header_bytes = va_arg (*args, u32); + uword header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "ethernet header truncated"); + + s = format + (s, "ETHERNET-MPLS label %d", + vnet_mpls_uc_get_label (clib_net_to_host_u32 (m->label_exp_s_ttl))); + + return s; +} + +u8 * format_mpls_gre_header_with_length (u8 * s, va_list * args) +{ + gre_header_t * h = va_arg (*args, gre_header_t *); + mpls_unicast_header_t * m = (mpls_unicast_header_t *)(h+1); + u32 max_header_bytes = va_arg (*args, u32); + uword header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "gre header truncated"); + + s = format + (s, "GRE-MPLS label %d", + vnet_mpls_uc_get_label (clib_net_to_host_u32 (m->label_exp_s_ttl))); + + return s; +} + +u8 * format_mpls_gre_header (u8 * s, va_list * args) +{ + gre_header_t * h = va_arg (*args, gre_header_t *); + return format (s, "%U", format_mpls_gre_header_with_length, h, 0); +} + +uword +unformat_mpls_gre_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + gre_header_t _g, * g = &_g; + mpls_unicast_header_t _h, * h = &_h; + u32 label, label_exp_s_ttl; + + if (! unformat (input, "MPLS %d", &label)) + return 0; + + g->protocol = clib_host_to_net_u16 (GRE_PROTOCOL_mpls_unicast); + + label_exp_s_ttl = (label<<12) | (1<<8) /* s-bit */ | 0xFF; + h->label_exp_s_ttl = clib_host_to_net_u32 (label_exp_s_ttl); + + /* Add gre, mpls headers to result. */ + { + void * p; + u32 g_n_bytes = sizeof (g[0]); + u32 h_n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, g_n_bytes); + memcpy (p, g, g_n_bytes); + + vec_add2 (*result, p, h_n_bytes); + memcpy (p, h, h_n_bytes); + } + + return 1; +} + +uword +unformat_mpls_label_net_byte_order (unformat_input_t * input, + va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + u32 label; + + if (!unformat (input, "MPLS: label %d", &label)) + return 0; + + label = (label<<12) | (1<<8) /* s-bit set */ | 0xFF /* ttl */; + + *result = clib_host_to_net_u32 (label); + return 1; +} + +mpls_encap_t * +mpls_encap_by_fib_and_dest (mpls_main_t * mm, u32 rx_fib, u32 dst_address) +{ + uword * p; + mpls_encap_t * e; + u64 key; + + key = ((u64)rx_fib<<32) | ((u64) dst_address); + p = hash_get (mm->mpls_encap_by_fib_and_dest, key); + + if (!p) + return 0; + + e = pool_elt_at_index (mm->encaps, p[0]); + return e; +} + +int vnet_mpls_add_del_encap (ip4_address_t *dest, u32 fib_id, + u32 *labels_host_byte_order, + u32 policy_tunnel_index, + int no_dst_hash, u32 * indexp, int is_add) +{ + mpls_main_t * mm = &mpls_main; + ip4_main_t * im = &ip4_main; + mpls_encap_t * e; + u32 label_net_byte_order, label_host_byte_order; + u32 fib_index; + u64 key; + uword *p; + int i; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_index = p[0]; + + key = ((u64)fib_index<<32) | ((u64) dest->as_u32); + + if (is_add) + { + pool_get (mm->encaps, e); + memset (e, 0, sizeof (*e)); + + for (i = 0; i < vec_len (labels_host_byte_order); i++) + { + mpls_unicast_header_t h; + label_host_byte_order = labels_host_byte_order[i]; + + /* Reformat label into mpls_unicast_header_t */ + label_host_byte_order <<= 12; + if (i == vec_len(labels_host_byte_order) - 1) + label_host_byte_order |= 1<<8; /* S=1 */ + label_host_byte_order |= 0xff; /* TTL=FF */ + label_net_byte_order = clib_host_to_net_u32 (label_host_byte_order); + h.label_exp_s_ttl = label_net_byte_order; + vec_add1 (e->labels, h); + } + if (no_dst_hash == 0) + hash_set (mm->mpls_encap_by_fib_and_dest, key, e - mm->encaps); + if (indexp) + *indexp = e - mm->encaps; + if (policy_tunnel_index != ~0) + return vnet_mpls_policy_tunnel_add_rewrite (mm, e, policy_tunnel_index); + } + else + { + p = hash_get (mm->mpls_encap_by_fib_and_dest, key); + if (!p) + return VNET_API_ERROR_NO_SUCH_LABEL; + + e = pool_elt_at_index (mm->encaps, p[0]); + + vec_free (e->labels); + vec_free (e->rewrite); + pool_put(mm->encaps, e); + + if (no_dst_hash == 0) + hash_unset (mm->mpls_encap_by_fib_and_dest, key); + } + return 0; +} + +static clib_error_t * +mpls_add_encap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 fib_id; + u32 *labels = 0; + u32 this_label; + ip4_address_t dest; + u32 policy_tunnel_index = ~0; + int no_dst_hash = 0; + int rv; + int fib_set = 0; + int dest_set = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "fib %d", &fib_id)) + fib_set = 1; + else if (unformat (input, "dest %U", unformat_ip4_address, &dest)) + dest_set = 1; + else if (unformat (input, "no-dst-hash")) + no_dst_hash = 1; + else if (unformat (input, "label %d", &this_label)) + vec_add1 (labels, this_label); + else if (unformat (input, "policy-tunnel %d", &policy_tunnel_index)) + ; + else + break; + } + + if (fib_set == 0) + return clib_error_return (0, "fib-id missing"); + if (dest_set == 0) + return clib_error_return (0, "destination IP address missing"); + if (vec_len (labels) == 0) + return clib_error_return (0, "label stack missing"); + + rv = vnet_mpls_add_del_encap (&dest, fib_id, labels, + policy_tunnel_index, + no_dst_hash, 0 /* indexp */, + 1 /* is_add */); + vec_free (labels); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "fib id %d unknown", fib_id); + + default: + return clib_error_return (0, "vnet_mpls_add_del_encap returned %d", + rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (mpls_add_encap_command, static) = { + .path = "mpls encap add", + .short_help = + "mpls encap add label <label> ... fib <id> dest <ip4-address>", + .function = mpls_add_encap_command_fn, +}; + +u8 * format_mpls_unicast_header_host_byte_order (u8 * s, va_list * args) +{ + mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *); + u32 label = h->label_exp_s_ttl; + + s = format (s, "label %d exp %d, s %d, ttl %d", + vnet_mpls_uc_get_label (label), + vnet_mpls_uc_get_exp (label), + vnet_mpls_uc_get_s (label), + vnet_mpls_uc_get_ttl (label)); + return s; +} + +u8 * format_mpls_unicast_header_net_byte_order (u8 * s, va_list * args) +{ + mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *); + mpls_unicast_header_t h_host; + + h_host.label_exp_s_ttl = clib_net_to_host_u32 (h->label_exp_s_ttl); + + return format (s, "%U", format_mpls_unicast_header_host_byte_order, + &h_host); +} + +static clib_error_t * +mpls_del_encap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 fib_id; + ip4_address_t dest; + int rv; + + if (unformat (input, "fib %d dest %U", &fib_id, + unformat_ip4_address, &dest)) + { + rv = vnet_mpls_add_del_encap (&dest, fib_id, 0 /* labels */, + ~0 /* policy_tunnel_index */, + 0 /* no_dst_hash */, + 0 /* indexp */, + 0 /* is_add */); + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "fib id %d unknown", fib_id); + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "dest %U not in fib %d", + format_ip4_address, &dest, fib_id); + default: + break; + } + return 0; + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (mpls_del_encap_command, static) = { + .path = "mpls encap delete", + .short_help = "mpls encap delete fib <id> dest <ip4-address>", + .function = mpls_del_encap_command_fn, +}; + +int vnet_mpls_add_del_decap (u32 rx_fib_id, + u32 tx_fib_id, + u32 label_host_byte_order, + int s_bit, int next_index, int is_add) +{ + mpls_main_t * mm = &mpls_main; + ip4_main_t * im = &ip4_main; + mpls_decap_t * d; + u32 rx_fib_index, tx_fib_index_or_output_swif_index; + uword *p; + u64 key; + + p = hash_get (im->fib_index_by_table_id, rx_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + + rx_fib_index = p[0]; + + /* L3 decap => transform fib ID to fib index */ + if (next_index == MPLS_INPUT_NEXT_IP4_INPUT) + { + p = hash_get (im->fib_index_by_table_id, tx_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_INNER_FIB; + + tx_fib_index_or_output_swif_index = p[0]; + } + else + { + /* L2 decap, tx_fib_id is actually the output sw_if_index */ + tx_fib_index_or_output_swif_index = tx_fib_id; + } + + key = ((u64)rx_fib_index<<32) | ((u64) (label_host_byte_order<<12)) + | ((u64) s_bit<<8); + + p = hash_get (mm->mpls_decap_by_rx_fib_and_label, key); + + /* If deleting, or replacing an old entry */ + if (is_add == 0 || p) + { + if (is_add == 0 && p == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + d = pool_elt_at_index (mm->decaps, p[0]); + hash_unset (mm->mpls_decap_by_rx_fib_and_label, key); + pool_put (mm->decaps, d); + /* Deleting, we're done... */ + if (is_add == 0) + return 0; + } + + /* add decap entry... */ + pool_get (mm->decaps, d); + memset (d, 0, sizeof (*d)); + d->tx_fib_index = tx_fib_index_or_output_swif_index; + d->next_index = next_index; + + hash_set (mm->mpls_decap_by_rx_fib_and_label, key, d - mm->decaps); + + return 0; +} + +uword +unformat_mpls_gre_input_next (unformat_input_t * input, va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + int rv = 0; + + if (unformat (input, "lookup")) + { + *result = MPLS_INPUT_NEXT_IP4_INPUT; + rv = 1; + } + else if (unformat (input, "output")) + { + *result = MPLS_INPUT_NEXT_L2_OUTPUT; + rv = 1; + } + return rv; +} + +static clib_error_t * +mpls_add_decap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 rx_fib_id = 0; + u32 tx_fib_or_sw_if_index; + u32 label; + int s_bit = 1; + u32 next_index = 1; /* ip4_lookup, see node.c */ + int tx_fib_id_set = 0; + int label_set = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "fib %d", &tx_fib_or_sw_if_index)) + tx_fib_id_set = 1; + else if (unformat (input, "sw_if_index %d", &tx_fib_or_sw_if_index)) + tx_fib_id_set = 1; + else if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, + &tx_fib_or_sw_if_index)) + tx_fib_id_set = 1; + else if (unformat (input, "rx-fib %d", &rx_fib_id)) + ; + else if (unformat (input, "label %d", &label)) + label_set = 1; + else if (unformat (input, "s-bit-clear")) + s_bit = 0; + else if (unformat (input, "next %U", unformat_mpls_gre_input_next, + &next_index)) + ; + else + break; + } + + if (tx_fib_id_set == 0) + return clib_error_return (0, "lookup FIB ID not set"); + if (label_set == 0) + return clib_error_return (0, "missing label"); + + rv = vnet_mpls_add_del_decap (rx_fib_id, tx_fib_or_sw_if_index, + label, s_bit, next_index, 1 /* is_add */); + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "no such rx fib id %d", rx_fib_id); + + case VNET_API_ERROR_NO_SUCH_INNER_FIB: + return clib_error_return (0, "no such tx fib / swif %d", + tx_fib_or_sw_if_index); + + default: + return clib_error_return (0, "vnet_mpls_add_del_decap returned %d", + rv); + } + return 0; +} + +VLIB_CLI_COMMAND (mpls_add_decap_command, static) = { + .path = "mpls decap add", + .short_help = + "mpls decap add fib <id> label <nn> [s-bit-clear] [next-index <nn>]", + .function = mpls_add_decap_command_fn, +}; + +static clib_error_t * +mpls_del_decap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 rx_fib_id = 0; + u32 tx_fib_id = 0; + u32 label; + int s_bit = 1; + int label_set = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "rx-fib %d", &rx_fib_id)) + ; + else if (unformat (input, "label %d", &label)) + label_set = 1; + else if (unformat (input, "s-bit-clear")) + s_bit = 0; + } + + if (!label_set) + return clib_error_return (0, "label not set"); + + rv = vnet_mpls_add_del_decap (rx_fib_id, + tx_fib_id /* not interesting */, + label, s_bit, + 0 /* next_index not interesting */, + 0 /* is_add */); + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "no such rx fib id %d", rx_fib_id); + + case VNET_API_ERROR_NO_SUCH_INNER_FIB: + return clib_error_return (0, "no such lookup fib id %d", tx_fib_id); + + case VNET_API_ERROR_NO_SUCH_LABEL: + return clib_error_return (0, "no such label %d rx fib id %d", + label, rx_fib_id); + + default: + return clib_error_return (0, "vnet_mpls_add_del_decap returned %d", + rv); + } + return 0; +} + + +VLIB_CLI_COMMAND (mpls_del_decap_command, static) = { + .path = "mpls decap delete", + .short_help = "mpls decap delete label <label> rx-fib <id> [s-bit-clear]", + .function = mpls_del_decap_command_fn, +}; + +typedef struct { + u32 fib_index; + u32 entry_index; + u32 dest; + u32 s_bit; + u32 label; +} show_mpls_fib_t; + +static clib_error_t * +show_mpls_fib_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u64 key; + u32 value; + show_mpls_fib_t *records = 0; + show_mpls_fib_t *s; + mpls_main_t * mm = &mpls_main; + ip4_main_t * im = &ip4_main; + ip4_fib_t * rx_fib, * tx_fib; + u32 tx_table_id; + char *swif_tag; + + hash_foreach (key, value, mm->mpls_encap_by_fib_and_dest, + ({ + vec_add2 (records, s, 1); + s->fib_index = (u32)(key>>32); + s->dest = (u32)(key & 0xFFFFFFFF); + s->entry_index = (u32) value; + })); + + if (!vec_len(records)) + { + vlib_cli_output (vm, "MPLS encap table empty"); + goto decap_table; + } + /* sort output by dst address within fib */ + vec_sort (records, r0, r1, clib_net_to_host_u32(r0->dest) - + clib_net_to_host_u32(r1->dest)); + vec_sort (records, r0, r1, r0->fib_index - r1->fib_index); + vlib_cli_output (vm, "MPLS encap table"); + vlib_cli_output (vm, "%=6s%=16s%=16s", "Table", "Dest address", "Labels"); + vec_foreach (s, records) + { + rx_fib = vec_elt_at_index (im->fibs, s->fib_index); + vlib_cli_output (vm, "%=6d%=16U%=16U", rx_fib->table_id, + format_ip4_address, &s->dest, + format_mpls_encap_index, mm, s->entry_index); + } + + decap_table: + vec_reset_length(records); + + hash_foreach (key, value, mm->mpls_decap_by_rx_fib_and_label, + ({ + vec_add2 (records, s, 1); + s->fib_index = (u32)(key>>32); + s->entry_index = (u32) value; + s->label = ((u32) key)>>12; + s->s_bit = (key & (1<<8)) != 0; + })); + + if (!vec_len(records)) + { + vlib_cli_output (vm, "MPLS decap table empty"); + goto out; + } + + vec_sort (records, r0, r1, r0->label - r1->label); + + vlib_cli_output (vm, "MPLS decap table"); + vlib_cli_output (vm, "%=10s%=15s%=6s%=6s", "RX Table", "TX Table/Intfc", + "Label", "S-bit"); + vec_foreach (s, records) + { + mpls_decap_t * d; + d = pool_elt_at_index (mm->decaps, s->entry_index); + if (d->next_index == MPLS_INPUT_NEXT_IP4_INPUT) + { + tx_fib = vec_elt_at_index (im->fibs, d->tx_fib_index); + tx_table_id = tx_fib->table_id; + swif_tag = " "; + } + else + { + tx_table_id = d->tx_fib_index; + swif_tag = "(i) "; + } + rx_fib = vec_elt_at_index (im->fibs, s->fib_index); + + vlib_cli_output (vm, "%=10d%=10d%=5s%=6d%=6d", rx_fib->table_id, + tx_table_id, swif_tag, s->label, s->s_bit); + } + + out: + vec_free(records); + return 0; +} + +VLIB_CLI_COMMAND (show_mpls_fib_command, static) = { + .path = "show mpls fib", + .short_help = "show mpls fib", + .function = show_mpls_fib_command_fn, +}; + +int mpls_fib_reset_labels (u32 fib_id) +{ + u64 key; + u32 value; + show_mpls_fib_t *records = 0; + show_mpls_fib_t *s; + mpls_main_t * mm = &mpls_main; + ip4_main_t * im = &ip4_main; + u32 fib_index; + uword *p; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_index = p[0]; + + hash_foreach (key, value, mm->mpls_encap_by_fib_and_dest, + ({ + if (fib_index == (u32)(key>>32)) { + vec_add2 (records, s, 1); + s->dest = (u32)(key & 0xFFFFFFFF); + s->entry_index = (u32) value; + } + })); + + vec_foreach (s, records) + { + key = ((u64)fib_index<<32) | ((u64) s->dest); + hash_unset (mm->mpls_encap_by_fib_and_dest, key); + pool_put_index (mm->encaps, s->entry_index); + } + + vec_reset_length(records); + + hash_foreach (key, value, mm->mpls_decap_by_rx_fib_and_label, + ({ + if (fib_index == (u32) (key>>32)) { + vec_add2 (records, s, 1); + s->entry_index = value; + s->fib_index = fib_index; + s->s_bit = key & (1<<8); + s->dest = (u32)((key & 0xFFFFFFFF)>>12); + } + })); + + vec_foreach (s, records) + { + key = ((u64)fib_index <<32) | ((u64)(s->dest<<12)) | + ((u64)s->s_bit); + + hash_unset (mm->mpls_decap_by_rx_fib_and_label, key); + pool_put_index (mm->decaps, s->entry_index); + } + + vec_free(records); + return 0; +} + +static clib_error_t * mpls_init (vlib_main_t * vm) +{ + mpls_main_t * mm = &mpls_main; + clib_error_t * error; + + memset (mm, 0, sizeof (mm[0])); + mm->vlib_main = vm; + mm->vnet_main = vnet_get_main(); + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + + mm->mpls_encap_by_fib_and_dest = hash_create (0, sizeof (uword)); + mm->mpls_decap_by_rx_fib_and_label = hash_create (0, sizeof (uword)); + + return vlib_call_init_function (vm, mpls_input_init); +} + +VLIB_INIT_FUNCTION (mpls_init); + +mpls_main_t * mpls_get_main (vlib_main_t * vm) +{ + vlib_call_init_function (vm, mpls_init); + return &mpls_main; +} + diff --git a/vnet/vnet/mpls-gre/mpls.h b/vnet/vnet/mpls-gre/mpls.h new file mode 100644 index 00000000000..5d7f9c5e219 --- /dev/null +++ b/vnet/vnet/mpls-gre/mpls.h @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_mpls_gre_h +#define included_vnet_mpls_gre_h + +#include <vnet/vnet.h> +#include <vnet/gre/gre.h> +#include <vnet/mpls-gre/packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ethernet/ethernet.h> + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; /* 20 bytes */ + gre_header_t gre; /* 4 bytes */ + mpls_unicast_header_t labels[0]; /* 4 bytes each */ +}) ip4_gre_and_mpls_header_t; + +vnet_hw_interface_class_t mpls_gre_hw_interface_class; + +typedef enum { +#define mpls_error(n,s) MPLS_ERROR_##n, +#include <vnet/mpls-gre/error.def> +#undef mpls_error + MPLS_N_ERROR, +} mpls_gre_error_t; + +/* + * No protocol info, MPLS labels don't have a next-header field + * presumably the label field tells all... + */ + +typedef struct { + ip4_address_t tunnel_src; + ip4_address_t tunnel_dst; + ip4_address_t intfc_address; + u32 mask_width; + u32 inner_fib_index; + u32 outer_fib_index; + u32 encap_index; + u32 hw_if_index; /* L2 x-connect capable tunnel intfc */ + u8 * rewrite_data; + u8 l2_only; +} mpls_gre_tunnel_t; + +typedef struct { + u8 tunnel_dst[6]; + ip4_address_t intfc_address; + u32 tx_sw_if_index; + u32 inner_fib_index; + u32 mask_width; + u32 encap_index; + u32 hw_if_index; + u8 * rewrite_data; + u8 l2_only; +} mpls_eth_tunnel_t; + +typedef struct { + mpls_unicast_header_t *labels; + /* only for policy tunnels */ + u8 * rewrite; + u32 output_next_index; +} mpls_encap_t; + +typedef struct { + u32 tx_fib_index; + u32 next_index; /* e.g. ip4/6-input, l2-input */ +} mpls_decap_t; + +typedef struct { + /* pool of gre tunnel instances */ + mpls_gre_tunnel_t *gre_tunnels; + u32 * free_gre_sw_if_indices; + + /* pool of ethernet tunnel instances */ + mpls_eth_tunnel_t *eth_tunnels; + u32 * free_eth_sw_if_indices; + + /* Encap side: map (fib, dst_address) to mpls label stack */ + mpls_encap_t * encaps; + uword * mpls_encap_by_fib_and_dest; + + /* Decap side: map rx label to FIB */ + mpls_decap_t * decaps; + uword * mpls_decap_by_rx_fib_and_label; + + /* mpls-o-e policy tunnel next index for ip4-classify */ + u32 ip_classify_mpls_policy_encap_next_index; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} mpls_main_t; + +mpls_main_t mpls_main; + +format_function_t format_mpls_protocol; +format_function_t format_mpls_header; +format_function_t format_mpls_header_with_length; +format_function_t format_mpls_gre_header_with_length; +format_function_t format_mpls_eth_header_with_length; +format_function_t format_mpls_unicast_label; +format_function_t format_mpls_encap_index; + +vlib_node_registration_t mpls_input_node; +vlib_node_registration_t mpls_policy_encap_node; + +vnet_device_class_t mpls_gre_device_class; + +/* Parse mpls protocol as 0xXXXX or protocol name. + In either host or network byte order. */ +unformat_function_t unformat_mpls_protocol_host_byte_order; +unformat_function_t unformat_mpls_protocol_net_byte_order; +unformat_function_t unformat_mpls_label_net_byte_order; +unformat_function_t unformat_mpls_gre_header; +unformat_function_t unformat_pg_mpls_gre_header; + +/* Parse mpls header. */ +unformat_function_t unformat_mpls_header; +unformat_function_t unformat_pg_mpls_header; + +/* manually added to the interface output node in mpls.c */ +#define MPLS_GRE_OUTPUT_NEXT_LOOKUP 1 +#define MPLS_GRE_OUTPUT_NEXT_DROP VNET_INTERFACE_TX_NEXT_DROP + +mpls_encap_t * +mpls_encap_by_fib_and_dest (mpls_main_t * mm, u32 rx_fib, u32 dst_address); + +int mpls_label_from_fib_id_and_dest (mpls_main_t *gm, u32 fib_id, + u32 dst_address, u32 *labelp); + +int vnet_mpls_gre_add_del_tunnel (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, u32 outer_fib_id, + u32 * tunnel_intfc_sw_if_index, + u8 l2_only, + u8 is_add); + +int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, + u32 tx_sw_if_index, + u32 * tunnel_sw_if_index, + u8 l2_only, + u8 is_add); + +int vnet_mpls_gre_delete_fib_tunnels (u32 fib_id); + +int mpls_fib_reset_labels (u32 fib_id); + +int vnet_mpls_add_del_decap (u32 rx_fib_id, + u32 tx_fib_id, + u32 label_host_byte_order, + int s_bit, int next_index, int is_add); + +int vnet_mpls_add_del_encap (ip4_address_t *dest, u32 fib_id, + u32 *labels_host_byte_order, + u32 policy_tunnel_index, + int no_dst_hash, u32 * indexp, int is_add); + +int vnet_mpls_policy_tunnel_add_rewrite (mpls_main_t * mm, + mpls_encap_t * e, + u32 policy_tunnel_index); +typedef struct { + u32 lookup_miss; + + /* Tunnel-id / index in tunnel vector */ + u32 tunnel_id; + + /* mpls encap index */ + u32 mpls_encap_index; + + /* pkt length */ + u32 length; + + /* tunnel ip4 addresses */ + ip4_address_t src; + ip4_address_t dst; +} mpls_gre_tx_trace_t; + +u8 * format_mpls_gre_tx_trace (u8 * s, va_list * args); +u8 * format_mpls_gre_header (u8 * s, va_list * args); + +#define foreach_mpls_input_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(L2_OUTPUT, "l2-output") + +typedef enum { +#define _(s,n) MPLS_INPUT_NEXT_##s, + foreach_mpls_input_next +#undef _ + MPLS_INPUT_N_NEXT, +} mpls_input_next_t; + + +typedef struct { + u32 lookup_miss; + + /* Tunnel-id / index in tunnel vector */ + u32 tunnel_id; + + /* output interface */ + u32 tx_sw_if_index; + + /* mpls encap index */ + u32 mpls_encap_index; + + /* pkt length */ + u32 length; + + u8 dst[6]; +} mpls_eth_tx_trace_t; + +u8 * format_mpls_eth_tx_trace (u8 * s, va_list * args); + +#endif /* included_vnet_mpls_gre_h */ diff --git a/vnet/vnet/mpls-gre/node.c b/vnet/vnet/mpls-gre/node.c new file mode 100644 index 00000000000..6bf5f814aec --- /dev/null +++ b/vnet/vnet/mpls-gre/node.c @@ -0,0 +1,359 @@ +/* + * node.c: mpls-o-gre decap processing + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls-gre/mpls.h> + +typedef struct { + u32 next_index; + u32 decap_index; + u32 tx_fib_index; + u32 label_host_byte_order; +} mpls_rx_trace_t; + +u8 * format_mpls_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_rx_trace_t * t = va_arg (*args, mpls_rx_trace_t *); + char * next_name; + + next_name = "BUG!"; + +#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b; + foreach_mpls_input_next; +#undef _ + + s = format (s, "MPLS: next %s, lookup fib index %d, decap index %d\n", + next_name, t->next_index, t->tx_fib_index, t->decap_index); + if (t->decap_index != ~0) + { + s = format (s, " label %d", + vnet_mpls_uc_get_label(t->label_host_byte_order)); + } + return s; +} + +vlib_node_registration_t mpls_input_node; + +typedef struct { + u32 last_label; + u32 last_inner_fib_index; + u32 last_outer_fib_index; + mpls_main_t * mpls_main; +} mpls_input_runtime_t; + +static inline uword +mpls_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_mpls_o_gre) +{ + u32 n_left_from, next_index, * from, * to_next; + ip4_main_t * im = &ip4_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + mpls_input_runtime_t * rt; + mpls_main_t * mm; + + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + mm = rt->mpls_main; + /* + * Force an initial lookup every time, in case the control-plane + * changed the label->FIB mapping. + */ + rt->last_label = ~0; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + +#if 0 + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + mpls_unicast_header_t * h0, * h1; + int li0, li1; + u64 key0, key1; + u32 label0, label1; + u32 next0, next1; + uword * p0, * p1; + u32 fib_index0, fib_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* $$$$$ dual loop me */ + + vlib_buffer_advance (b0, sizeof (*h0)); + vlib_buffer_advance (b1, sizeof (*h1)); + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + mpls_unicast_header_t * h0; + u32 label0; + u32 next0; + u64 key0; + uword * p0; + u32 rx_fib_index0; + mpls_decap_t *d0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + if (is_mpls_o_gre) + { + rx_fib_index0 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + } + else + { +#if 0 + /* If separate RX numbering spaces are required... */ + rx_fib_index0 = vec_elt (mm->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); +#endif + rx_fib_index0 = 0; + } + + next0 = ~0; + d0 = 0; + + /* + * Expect the control-plane team to squeal like pigs. + * If they don't program a decap label entry for each + * and every label in the stack, packets go into the trash... + */ + + do + { + label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); + /* TTL expired? */ + if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0)) + { + next0 = MPLS_INPUT_NEXT_DROP; + b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + break; + } + + key0 = ((u64)rx_fib_index0<<32) + | ((u64)vnet_mpls_uc_get_label (label0)<<12) + | ((u64)vnet_mpls_uc_get_s (label0)<<8); + + /* + * The architecture crew claims that we won't need + * separate ip4, ip6, mpls-o-ethernet label numbering + * spaces. Use the low 8 key bits as a discriminator. + */ + + p0 = hash_get (mm->mpls_decap_by_rx_fib_and_label, key0); + if (p0 == 0) + { + next0 = MPLS_INPUT_NEXT_DROP; + b0->error = node->errors[MPLS_ERROR_BAD_LABEL]; + break; + } + d0 = pool_elt_at_index (mm->decaps, p0[0]); + next0 = d0->next_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = d0->tx_fib_index; + vlib_buffer_advance (b0, sizeof (*h0)); + h0 = vlib_buffer_get_current (b0); + } while (!vnet_mpls_uc_get_s(label0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->decap_index = d0 ? d0 - mm->decaps : ~0; + tr->tx_fib_index = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + tr->label_host_byte_order = label0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +static uword +mpls_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return mpls_input_inline (vm, node, from_frame, 1 /* is mpls-o-gre */); +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +VLIB_REGISTER_NODE (mpls_input_node) = { + .function = mpls_input, + .name = "mpls-gre-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof(mpls_input_runtime_t), + + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_INPUT_NEXT_##s] = n, + foreach_mpls_input_next +#undef _ + }, + + .format_buffer = format_mpls_gre_header_with_length, + .format_trace = format_mpls_rx_trace, + .unformat_buffer = unformat_mpls_gre_header, +}; + +static uword +mpls_ethernet_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return mpls_input_inline (vm, node, from_frame, 0 /* is mpls-o-gre */); +} + + +VLIB_REGISTER_NODE (mpls_ethernet_input_node) = { + .function = mpls_ethernet_input, + .name = "mpls-ethernet-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof(mpls_input_runtime_t), + + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_INPUT_NEXT_##s] = n, + foreach_mpls_input_next +#undef _ + }, + + .format_buffer = format_mpls_eth_header_with_length, + .format_trace = format_mpls_rx_trace, + .unformat_buffer = unformat_mpls_gre_header, +}; + +static void +mpls_setup_nodes (vlib_main_t * vm) +{ + vlib_node_t * n = vlib_get_node (vm, mpls_input_node.index); + pg_node_t * pn = pg_get_node (mpls_input_node.index); + mpls_input_runtime_t * rt; + + n->format_buffer = format_mpls_gre_header_with_length; + n->unformat_buffer = unformat_mpls_gre_header; + pn->unformat_edit = unformat_pg_mpls_header; + + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + rt->last_label = (u32) ~0; + rt->last_inner_fib_index = 0; + rt->last_outer_fib_index = 0; + rt->mpls_main = &mpls_main; + + n = vlib_get_node (vm, mpls_ethernet_input_node.index); + + n->format_buffer = format_mpls_eth_header_with_length; + + n->unformat_buffer = 0; /* unformat_mpls_ethernet_header; */ + + rt = vlib_node_get_runtime_data (vm, mpls_ethernet_input_node.index); + rt->last_label = (u32) ~0; + rt->last_inner_fib_index = 0; + rt->last_outer_fib_index = 0; + rt->mpls_main = &mpls_main; + + ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST, + mpls_ethernet_input_node.index); +} + +static clib_error_t * mpls_input_init (vlib_main_t * vm) +{ + clib_error_t * error; + + error = vlib_call_init_function (vm, mpls_init); + if (error) + clib_error_report (error); + + mpls_setup_nodes (vm); + + return 0; +} + +VLIB_INIT_FUNCTION (mpls_input_init); diff --git a/vnet/vnet/mpls-gre/packet.h b/vnet/vnet/mpls-gre/packet.h new file mode 100644 index 00000000000..baa01818f09 --- /dev/null +++ b/vnet/vnet/mpls-gre/packet.h @@ -0,0 +1,49 @@ +#ifndef included_vnet_mpls_packet_h +#define included_vnet_mpls_packet_h + +/* + * MPLS packet format + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +typedef struct { + /* Label: top 20 bits [in network byte order] */ + /* Experimental: 3 bits ... */ + /* S (bottom of label stack): 1 bit */ + /* TTL: 8 bits */ + u32 label_exp_s_ttl; +} mpls_unicast_header_t; + +static inline u32 vnet_mpls_uc_get_label (u32 label_exp_s_ttl) +{ + return (label_exp_s_ttl>>12); +} + +static inline u32 vnet_mpls_uc_get_exp (u32 label_exp_s_ttl) +{ + return ((label_exp_s_ttl>>9) & 0x7); +} + +static inline u32 vnet_mpls_uc_get_s (u32 label_exp_s_ttl) +{ + return ((label_exp_s_ttl>>8) & 0x1); +} + +static inline u32 vnet_mpls_uc_get_ttl (u32 label_exp_s_ttl) +{ + return (label_exp_s_ttl & 0xff); +} + +#endif /* included_vnet_mpls_packet_h */ diff --git a/vnet/vnet/mpls-gre/pg.c b/vnet/vnet/mpls-gre/pg.c new file mode 100644 index 00000000000..6b6a1017c58 --- /dev/null +++ b/vnet/vnet/mpls-gre/pg.c @@ -0,0 +1,71 @@ +/* + * pg.c: packet generator mpls/gre interface + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/gre/gre.h> +#include <vnet/mpls-gre/mpls.h> + +typedef struct { + pg_edit_t label; +} pg_mpls_header_t; + +static inline void +pg_mpls_header_init (pg_mpls_header_t * e) +{ + pg_edit_init (&e->label, mpls_unicast_header_t, label_exp_s_ttl); +} + +uword +unformat_pg_mpls_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_mpls_header_t * h; + vlib_main_t * vm = vlib_get_main(); + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (mpls_unicast_header_t), + &group_index); + pg_mpls_header_init (h); + + error = 1; + if (! unformat (input, "%U", + unformat_pg_edit, + unformat_mpls_label_net_byte_order, &h->label)) + goto done; + + { + pg_node_t * pg_node = 0; + vlib_node_t * ip_lookup_node; + + ip_lookup_node = vlib_get_node_by_name (vm, (u8 *)"ip4-input"); + ASSERT (ip_lookup_node); + + pg_node = pg_get_node (ip_lookup_node->index); + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + } + + error = 0; + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/mpls-gre/policy_encap.c b/vnet/vnet/mpls-gre/policy_encap.c new file mode 100644 index 00000000000..53411515e69 --- /dev/null +++ b/vnet/vnet/mpls-gre/policy_encap.c @@ -0,0 +1,172 @@ +/* + * policy_encap.c: mpls-o-e policy encap + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls-gre/mpls.h> + +typedef struct { + u32 next_index; + u32 encap_index; +} mpls_policy_encap_trace_t; + +u8 * format_mpls_policy_encap_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_policy_encap_trace_t * t = va_arg (*args, mpls_policy_encap_trace_t *); + + s = format (s, "MPLS-POLICY-ENCAP: next-index %d encap-index %d", + t->next_index, t->encap_index); + + return s; +} + +vlib_node_registration_t mpls_policy_encap_node; + +#define foreach_mpls_policy_encap_next \ +_(DROP, "error-drop") + +typedef enum { +#define _(s,n) MPLS_POLICY_ENCAP_NEXT_##s, + foreach_mpls_policy_encap_next +#undef _ + MPLS_POLICY_ENCAP_N_NEXT, +} mpls_policy_encap_next_t; + +#define foreach_mpls_policy_error \ +_(PKTS_ENCAP, "mpls policy tunnel packets encapsulated") + +typedef enum { +#define _(n,s) MPLS_POLICY_ENCAP_ERROR_##n, + foreach_mpls_policy_error + MPLS_POLICY_ENCAP_N_ERROR, +#undef _ +} mpls_policy_encap_error_t; + +static char * mpls_policy_encap_error_strings[] = + { +#define _(n,s) s, + foreach_mpls_policy_error +#undef _ +}; + +static uword +mpls_policy_encap (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + mpls_main_t * mm = &mpls_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u8 * h0; + u32 encap_index0; + u32 next0; + mpls_encap_t * e0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + encap_index0 = vnet_buffer(b0)->l2_classify.opaque_index; + + e0 = pool_elt_at_index (mm->encaps, encap_index0); + + vlib_buffer_advance (b0, -(word)vec_len(e0->rewrite)); + h0 = vlib_buffer_get_current (b0); + memcpy (h0, e0->rewrite, vec_len(e0->rewrite)); + + next0 = e0->output_next_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_policy_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->encap_index = encap_index0; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_policy_encap_node.index, + MPLS_POLICY_ENCAP_ERROR_PKTS_ENCAP, + from_frame->n_vectors); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (mpls_policy_encap_node) = { + .function = mpls_policy_encap, + .name = "mpls-policy-encap", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = 0, + + .n_errors = MPLS_POLICY_ENCAP_N_ERROR, + .error_strings = mpls_policy_encap_error_strings, + + .format_trace = format_mpls_policy_encap_trace, + + .n_next_nodes = MPLS_POLICY_ENCAP_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_POLICY_ENCAP_NEXT_##s] = n, + foreach_mpls_policy_encap_next +#undef _ + }, +}; + +static clib_error_t * +mpls_policy_encap_init (vlib_main_t * vm) +{ + mpls_main_t * mm = &mpls_main; + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, mpls_init))) + return error; + + mm->ip_classify_mpls_policy_encap_next_index = + vlib_node_add_next (mm->vlib_main, + ip4_classify_node.index, + mpls_policy_encap_node.index); + return 0; +} + +VLIB_INIT_FUNCTION (mpls_policy_encap_init); diff --git a/vnet/vnet/nsh-gre/decap.c b/vnet/vnet/nsh-gre/decap.c new file mode 100644 index 00000000000..9ef52ffb6de --- /dev/null +++ b/vnet/vnet/nsh-gre/decap.c @@ -0,0 +1,365 @@ +/* + * nsh.c: nsh packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/nsh-gre/nsh_gre.h> +#include <vnet/nsh-gre/nsh_gre_packet.h> + +vlib_node_registration_t nsh_input_node; + +typedef struct { + u32 next_index; + u32 tunnel_index; + u32 error; + nsh_header_t h; +} nsh_rx_trace_t; + + +u8 * format_nsh_header_with_length (u8 * s, va_list * args) +{ + nsh_header_t * h = va_arg (*args, nsh_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 tmp, header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "gre-nsh header truncated"); + + s = format (s, "ver %d ", h->ver_o_c>>6); + + if (h->ver_o_c & NSH_GRE_O_BIT) + s = format (s, "O-set "); + + if (h->ver_o_c & NSH_GRE_C_BIT) + s = format (s, "C-set "); + + s = format (s, "len %d (%d bytes) md_type %d next_protocol %d\n", + h->length, h->length * 4, h->md_type, h->next_protocol); + + tmp = clib_net_to_host_u32 (h->spi_si); + + s = format (s, " spi %d si %d ", + (tmp>>NSH_GRE_SPI_SHIFT) & NSH_GRE_SPI_MASK, + tmp & NSH_GRE_SINDEX_MASK); + + s = format (s, "c1 %u c2 %u c3 %u c4 %u", + clib_net_to_host_u32 (h->c1), + clib_net_to_host_u32 (h->c2), + clib_net_to_host_u32 (h->c3), + clib_net_to_host_u32 (h->c4)); + + return s; +} + + +u8 * format_nsh_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nsh_rx_trace_t * t = va_arg (*args, nsh_rx_trace_t *); + + if (t->tunnel_index != ~0) + { + s = format (s, "NSH: tunnel %d next %d error %d", t->tunnel_index, + t->next_index, t->error); + } + else + { + s = format (s, "NSH: no tunnel next %d error %d\n", t->next_index, + t->error); + } + s = format (s, "\n %U", format_nsh_header_with_length, &t->h, + (u32) sizeof (t->h) /* max size */); + return s; +} + +static uword +nsh_gre_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + nsh_gre_main_t * ngm = &nsh_gre_main; + u32 last_tunnel_index = ~0; + u64 last_key = ~0ULL; + u32 pkts_decapsulated = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + nsh_header_t * h0, * h1; + uword * p0, * p1; + u32 tunnel_index0, tunnel_index1; + nsh_gre_tunnel_t * t0, * t1; + u64 key0, key1; + u32 error0, error1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + /* gre stashed the src ip4 address for us... */ + key0 = (((u64)(vnet_buffer(b0)->gre.src))<<32) | h0->spi_si; + key1 = (((u64)(vnet_buffer(b1)->gre.src))<<32) | h1->spi_si; + + /* "pop" nsh header */ + vlib_buffer_advance (b0, sizeof (*h0)); + vlib_buffer_advance (b1, sizeof (*h1)); + + tunnel_index0 = ~0; + tunnel_index1 = ~0; + error0 = 0; + error1 = 0; + next0 = NSH_INPUT_NEXT_DROP; + next1 = NSH_INPUT_NEXT_DROP; + + if (PREDICT_FALSE(key0 != last_key)) + { + p0 = hash_get (ngm->nsh_gre_tunnel_by_src_address, key0); + + if (p0 == 0) + { + error0 = NSH_GRE_ERROR_NO_SUCH_TUNNEL; + goto trace0; + } + + last_key = key0; + tunnel_index0 = last_tunnel_index = p0[0]; + } + else + tunnel_index0 = last_tunnel_index; + + t0 = pool_elt_at_index (ngm->tunnels, tunnel_index0); + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b0); + + next0 = t0->decap_next_index; + + /* ip[46] lookup in the configured FIB, otherwise an opaque */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index; + + trace0: + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->error = error0; + tr->tunnel_index = tunnel_index0; + tr->h = h0[0]; + } + + if (PREDICT_FALSE(key1 != last_key)) + { + p1 = hash_get (ngm->nsh_gre_tunnel_by_src_address, key1); + + if (p1 == 0) + { + error1 = NSH_GRE_ERROR_NO_SUCH_TUNNEL; + goto trace1; + } + + last_key = key1; + tunnel_index1 = last_tunnel_index = p1[0]; + } + else + tunnel_index1 = last_tunnel_index; + + t1 = pool_elt_at_index (ngm->tunnels, tunnel_index1); + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b1); + + next1 = t1->decap_next_index; + + /* ip[46] lookup in the configured FIB, otherwise an opaque */ + vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index; + + pkts_decapsulated +=2; + + trace1: + b1->error = error1 ? node->errors[error1] : 0; + + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_rx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->next_index = next1; + tr->error = error1; + tr->tunnel_index = tunnel_index1; + tr->h = h1[0]; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + nsh_header_t * h0; + uword * p0; + u32 tunnel_index0; + nsh_gre_tunnel_t * t0; + u64 key0; + u32 error0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + /* gre stashed the src ip4 address for us... */ + key0 = (((u64)(vnet_buffer(b0)->gre.src))<<32) | h0->spi_si; + + /* "pop" nsh header */ + vlib_buffer_advance (b0, sizeof (*h0)); + + tunnel_index0 = ~0; + error0 = 0; + next0 = NSH_INPUT_NEXT_DROP; + + if (PREDICT_FALSE(key0 != last_key)) + { + p0 = hash_get (ngm->nsh_gre_tunnel_by_src_address, key0); + + if (p0 == 0) + { + error0 = NSH_GRE_ERROR_NO_SUCH_TUNNEL; + goto trace00; + } + + last_key = key0; + tunnel_index0 = last_tunnel_index = p0[0]; + } + else + tunnel_index0 = last_tunnel_index; + + t0 = pool_elt_at_index (ngm->tunnels, tunnel_index0); + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b0); + + next0 = t0->decap_next_index; + + /* ip[46] lookup in the configured FIB, otherwise an opaque */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index; + pkts_decapsulated ++; + + trace00: + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->error = error0; + tr->tunnel_index = tunnel_index0; + tr->h = h0[0]; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, nsh_gre_input_node.index, + NSH_GRE_ERROR_DECAPSULATED, + pkts_decapsulated); + return from_frame->n_vectors; +} + +static char * nsh_error_strings[] = { +#define nsh_gre_error(n,s) s, +#include <vnet/nsh-gre/nsh_gre_error.def> +#undef nsh_gre_error +#undef _ +}; + +VLIB_REGISTER_NODE (nsh_gre_input_node) = { + .function = nsh_gre_input, + .name = "nsh-gre-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = NSH_GRE_N_ERROR, + .error_strings = nsh_error_strings, + + .n_next_nodes = NSH_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [NSH_INPUT_NEXT_##s] = n, + foreach_nsh_gre_input_next +#undef _ + }, + + .format_buffer = format_nsh_header_with_length, + .format_trace = format_nsh_rx_trace, + // $$$$ .unformat_buffer = unformat_nsh_gre_header, +}; diff --git a/vnet/vnet/nsh-gre/encap.c b/vnet/vnet/nsh-gre/encap.c new file mode 100644 index 00000000000..875e8311098 --- /dev/null +++ b/vnet/vnet/nsh-gre/encap.c @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/nsh-gre/nsh_gre.h> + +/* Statistics (not really errors) */ +#define foreach_nsh_gre_encap_error \ +_(ENCAPSULATED, "good packets encapsulated") + +static char * nsh_gre_encap_error_strings[] = { +#define _(sym,string) string, + foreach_nsh_gre_encap_error +#undef _ +}; + +typedef enum { +#define _(sym,str) NSH_GRE_ENCAP_ERROR_##sym, + foreach_nsh_gre_encap_error +#undef _ + NSH_GRE_ENCAP_N_ERROR, +} nsh_gre_encap_error_t; + +typedef enum { + NSH_GRE_ENCAP_NEXT_IP4_LOOKUP, + NSH_GRE_ENCAP_NEXT_DROP, + NSH_GRE_ENCAP_N_NEXT, +} nsh_gre_encap_next_t; + +typedef struct { + u32 tunnel_index; +} nsh_gre_encap_trace_t; + +u8 * format_nsh_gre_encap_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nsh_gre_encap_trace_t * t = va_arg (*args, nsh_gre_encap_trace_t *); + + s = format (s, "NSH-GRE-ENCAP: tunnel %d", t->tunnel_index); + return s; +} + +static uword +nsh_gre_encap (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + nsh_gre_main_t * ngm = &nsh_gre_main; + vnet_main_t * vnm = ngm->vnet_main; + u32 pkts_encapsulated = 0; + u16 old_l0 = 0, old_l1 = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0 = NSH_GRE_ENCAP_NEXT_IP4_LOOKUP; + u32 next1 = NSH_GRE_ENCAP_NEXT_IP4_LOOKUP; + vnet_hw_interface_t * hi0, * hi1; + ip4_header_t * ip0, * ip1; + u64 * copy_src0, * copy_dst0; + u64 * copy_src1, * copy_dst1; + nsh_gre_tunnel_t * t0, * t1; + u16 new_l0, new_l1; + ip_csum_t sum0, sum1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + hi0 = vnet_get_sup_hw_interface + (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]); + hi1 = vnet_get_sup_hw_interface + (vnm, vnet_buffer(b1)->sw_if_index[VLIB_TX]); + + t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance); + t1 = pool_elt_at_index (ngm->tunnels, hi1->dev_instance); + + ASSERT(vec_len(t0->rewrite) >= 24); + ASSERT(vec_len(t1->rewrite) >= 24); + + /* Apply the rewrite string. $$$$ vnet_rewrite? */ + vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite)); + vlib_buffer_advance (b1, -(word)_vec_len(t1->rewrite)); + + ip0 = vlib_buffer_get_current(b0); + ip1 = vlib_buffer_get_current(b1); + /* Copy the fixed header */ + copy_dst0 = (u64 *) ip0; + copy_src0 = (u64 *) t0->rewrite; + copy_dst1 = (u64 *) ip1; + copy_src1 = (u64 *) t1->rewrite; + + copy_dst0[0] = copy_src0[0]; + copy_dst0[1] = copy_src0[1]; + copy_dst0[2] = copy_src0[2]; + + copy_dst1[0] = copy_src1[0]; + copy_dst1[1] = copy_src1[1]; + copy_dst1[2] = copy_src1[2]; + + /* If there are TLVs to copy, do so */ + if (PREDICT_FALSE (_vec_len(t0->rewrite) > 24)) + memcpy (©_dst0[3], t0->rewrite + 24 , + _vec_len (t0->rewrite)-24); + + if (PREDICT_FALSE (_vec_len(t1->rewrite) > 24)) + memcpy (©_dst1[3], t1->rewrite + 24 , + _vec_len (t1->rewrite)-24); + + /* fix the <bleep>ing outer-IP checksums */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + sum1 = ip1->checksum; + /* old_l1 always 1, see the rewrite setup */ + new_l1 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); + + sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, + length /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + ip1->length = new_l1; + + /* Reset to look up tunnel partner in the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index; + pkts_encapsulated += 2; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_gre_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->tunnel_index = t0 - ngm->tunnels; + } + + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_gre_encap_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->tunnel_index = t1 - ngm->tunnels; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = NSH_GRE_ENCAP_NEXT_IP4_LOOKUP; + vnet_hw_interface_t * hi0; + ip4_header_t * ip0; + u64 * copy_src0, * copy_dst0; + nsh_gre_tunnel_t * t0; + u16 new_l0; + ip_csum_t sum0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* 1-wide cache? */ + hi0 = vnet_get_sup_hw_interface + (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]); + + t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance); + + ASSERT(vec_len(t0->rewrite) >= 24); + + /* Apply the rewrite string. $$$$ vnet_rewrite? */ + vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite)); + + ip0 = vlib_buffer_get_current(b0); + /* Copy the fixed header */ + copy_dst0 = (u64 *) ip0; + copy_src0 = (u64 *) t0->rewrite; + copy_dst0[0] = copy_src0[0]; + copy_dst0[1] = copy_src0[1]; + copy_dst0[2] = copy_src0[2]; + + /* If there are TLVs to copy, do so */ + if (PREDICT_FALSE (_vec_len(t0->rewrite) > 24)) + memcpy (©_dst0[3], t0->rewrite + 24 , + _vec_len (t0->rewrite)-24); + + /* fix the <bleep>ing outer-IP checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + /* Reset to look up tunnel partner in the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; + pkts_encapsulated ++; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_gre_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->tunnel_index = t0 - ngm->tunnels; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, node->node_index, + NSH_GRE_ENCAP_ERROR_ENCAPSULATED, + pkts_encapsulated); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (nsh_gre_encap_node) = { + .function = nsh_gre_encap, + .name = "nsh-gre-encap", + .vector_size = sizeof (u32), + .format_trace = format_nsh_gre_encap_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(nsh_gre_encap_error_strings), + .error_strings = nsh_gre_encap_error_strings, + + .n_next_nodes = NSH_GRE_ENCAP_N_NEXT, + + // add dispositions here + .next_nodes = { + [NSH_GRE_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [NSH_GRE_ENCAP_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/nsh-gre/nsh_gre.c b/vnet/vnet/nsh-gre/nsh_gre.c new file mode 100644 index 00000000000..f85e71a2d43 --- /dev/null +++ b/vnet/vnet/nsh-gre/nsh_gre.c @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/l2/l2_input.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/nsh-gre/nsh_gre.h> + +nsh_gre_main_t nsh_gre_main; + +static u8 * format_decap_next (u8 * s, va_list * args) +{ + u32 next_index = va_arg (*args, u32); + + switch (next_index) + { + case NSH_INPUT_NEXT_DROP: + return format (s, "drop"); + case NSH_INPUT_NEXT_IP4_INPUT: + return format (s, "ip4"); + case NSH_INPUT_NEXT_IP6_INPUT: + return format (s, "ip6"); + default: + return format (s, "index %d", next_index); + } + return s; +} + + +u8 * format_nsh_gre_tunnel (u8 * s, va_list * args) +{ + nsh_gre_tunnel_t * t = va_arg (*args, nsh_gre_tunnel_t *); + nsh_gre_main_t * ngm = &nsh_gre_main; + + s = format (s, "[%d] %U (src) %U (dst) fibs: (encap %d, decap %d)", + t - ngm->tunnels, + format_ip4_address, &t->src, + format_ip4_address, &t->dst, + t->encap_fib_index, + t->decap_fib_index); + + s = format (s, " decap-next %U\n", format_decap_next, t->decap_next_index); + + s = format (s, " ver %d ", (t->ver_o_c>>6)); + if (t->ver_o_c & NSH_GRE_O_BIT) + s = format (s, "O-set "); + + if (t->ver_o_c & NSH_GRE_C_BIT) + s = format (s, "C-set "); + + s = format (s, "len %d (%d bytes) md_type %d next_protocol %d\n", + t->length, t->length * 4, t->md_type, t->next_protocol); + + s = format (s, " service path %d service index %d\n", + (t->spi_si>>NSH_GRE_SPI_SHIFT) & NSH_GRE_SPI_MASK, + t->spi_si & NSH_GRE_SINDEX_MASK); + + s = format (s, " c1 %d c2 %d c3 %d c4 %d\n", + t->c1, t->c2, t->c3, t->c4); + + return s; +} + +static u8 * format_nsh_gre_name (u8 * s, va_list * args) +{ + nsh_gre_main_t * ngm = &nsh_gre_main; + u32 i = va_arg (*args, u32); + u32 show_dev_instance = ~0; + + if (i < vec_len (ngm->dev_inst_by_real)) + show_dev_instance = ngm->dev_inst_by_real[i]; + + if (show_dev_instance != ~0) + i = show_dev_instance; + + return format (s, "nsh_gre_tunnel%d", i); +} + +static int nsh_gre_name_renumber (vnet_hw_interface_t * hi, + u32 new_dev_instance) +{ + nsh_gre_main_t * ngm = &nsh_gre_main; + + vec_validate_init_empty (ngm->dev_inst_by_real, hi->dev_instance, ~0); + + ngm->dev_inst_by_real [hi->dev_instance] = new_dev_instance; + + return 0; +} + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +VNET_DEVICE_CLASS (nsh_gre_device_class,static) = { + .name = "NSH_GRE", + .format_device_name = format_nsh_gre_name, + .format_tx_trace = format_nsh_gre_encap_trace, + .tx_function = dummy_interface_tx, + .name_renumber = nsh_gre_name_renumber, +}; + +static uword dummy_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + return 0; +} + +static u8 * format_nsh_gre_header_with_length (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + s = format (s, "unimplemented dev %u", dev_instance); + return s; +} + +VNET_HW_INTERFACE_CLASS (nsh_gre_hw_class) = { + .name = "NSH_GRE", + .format_header = format_nsh_gre_header_with_length, + .set_rewrite = dummy_set_rewrite, +}; + +#define foreach_copy_field \ +_(src.as_u32) \ +_(dst.as_u32) \ +_(encap_fib_index) \ +_(decap_fib_index) \ +_(decap_next_index) \ +_(ver_o_c) \ +_(length) \ +_(md_type) \ +_(next_protocol) \ +_(spi_si) \ +_(c1) \ +_(c2) \ +_(c3) \ +_(c4) \ +_(tlvs) + +#define foreach_32bit_field \ +_(spi_si) \ +_(c1) \ +_(c2) \ +_(c3) \ +_(c4) + +static int nsh_gre_rewrite (nsh_gre_tunnel_t * t) +{ + u8 *rw = 0; + ip4_header_t * ip0; + nsh_header_t * nsh0; + ip4_gre_and_nsh_header_t * h0; + int len; + + len = sizeof (*h0) + vec_len(t->tlvs)*4; + + vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES); + + h0 = (ip4_gre_and_nsh_header_t *) rw; + + /* Fixed portion of the (outer) ip4 header */ + ip0 = &h0->ip4; + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_GRE; + /* we fix up the ip4 header length and checksum after-the-fact */ + ip0->src_address.as_u32 = t->src.as_u32; + ip0->dst_address.as_u32 = t->dst.as_u32; + ip0->checksum = ip4_header_checksum (ip0); + + /* GRE header, zero execpt for the NSH ethertype */ + h0->gre.protocol = clib_host_to_net_u16(GRE_PROTOCOL_nsh); + + /* NSH header */ + nsh0 = &h0->nsh; + nsh0->ver_o_c = t->ver_o_c; + nsh0->md_type = t->md_type; + nsh0->next_protocol = t->next_protocol; + nsh0->spi_si = t->spi_si; + nsh0->c1 = t->c1; + nsh0->c2 = t->c2; + nsh0->c3 = t->c3; + nsh0->c4 = t->c4; + + /* Endian swap 32-bit fields */ +#define _(x) nsh0->x = clib_host_to_net_u32(nsh0->x); + foreach_32bit_field; +#undef _ + + /* fix nsh header length */ + t->length = 6 + vec_len(t->tlvs); + nsh0->length = t->length; + + /* Copy any TLVs */ + if (vec_len(t->tlvs)) + memcpy (nsh0->tlvs, t->tlvs, 4*vec_len(t->tlvs)); + + t->rewrite = rw; + return (0); +} + +int vnet_nsh_gre_add_del_tunnel (vnet_nsh_gre_add_del_tunnel_args_t *a, + u32 * sw_if_indexp) +{ + nsh_gre_main_t * ngm = &nsh_gre_main; + nsh_gre_tunnel_t *t = 0; + vnet_main_t * vnm = ngm->vnet_main; + vnet_hw_interface_t * hi; + uword * p; + u32 hw_if_index = ~0; + u32 sw_if_index = ~0; + int rv; + u64 key; + u32 spi_si_net_byte_order; + + spi_si_net_byte_order = clib_host_to_net_u32(a->spi_si); + + key = (((u64)(a->src.as_u32))<<32) | spi_si_net_byte_order; + + p = hash_get (ngm->nsh_gre_tunnel_by_src_address, key); + + if (a->is_add) + { + /* adding a tunnel: tunnel must not already exist */ + if (p) + return VNET_API_ERROR_INVALID_VALUE; + + if (a->decap_next_index >= NSH_INPUT_N_NEXT) + return VNET_API_ERROR_INVALID_DECAP_NEXT; + + pool_get_aligned (ngm->tunnels, t, CLIB_CACHE_LINE_BYTES); + memset (t, 0, sizeof (*t)); + + /* copy from arg structure */ +#define _(x) t->x = a->x; + foreach_copy_field; +#undef _ + + rv = nsh_gre_rewrite (t); + + if (rv) + { + pool_put (ngm->tunnels, t); + return rv; + } + + hash_set (ngm->nsh_gre_tunnel_by_src_address, key, t - ngm->tunnels); + + if (vec_len (ngm->free_nsh_gre_tunnel_hw_if_indices) > 0) + { + hw_if_index = ngm->free_nsh_gre_tunnel_hw_if_indices + [vec_len (ngm->free_nsh_gre_tunnel_hw_if_indices)-1]; + _vec_len (ngm->free_nsh_gre_tunnel_hw_if_indices) -= 1; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = t - ngm->tunnels; + hi->hw_instance = hi->dev_instance; + } + else + { + hw_if_index = vnet_register_interface + (vnm, nsh_gre_device_class.index, t - ngm->tunnels, + nsh_gre_hw_class.index, t - ngm->tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->output_node_index = nsh_gre_encap_node.index; + } + + t->hw_if_index = hw_if_index; + t->sw_if_index = sw_if_index = hi->sw_if_index; + + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + } + else + { + /* deleting a tunnel: tunnel must exist */ + if (!p) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + t = pool_elt_at_index (ngm->tunnels, p[0]); + + vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */); + vec_add1 (ngm->free_nsh_gre_tunnel_hw_if_indices, t->hw_if_index); + + hash_unset (ngm->nsh_gre_tunnel_by_src_address, key); + vec_free (t->rewrite); + pool_put (ngm->tunnels, t); + } + + if (sw_if_indexp) + *sw_if_indexp = sw_if_index; + + return 0; +} + +static u32 fib_index_from_fib_id (u32 fib_id) +{ + ip4_main_t * im = &ip4_main; + uword * p; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (!p) + return ~0; + + return p[0]; +} + +static uword unformat_decap_next (unformat_input_t * input, va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + u32 tmp; + + if (unformat (input, "drop")) + *result = NSH_INPUT_NEXT_DROP; + else if (unformat (input, "ip4")) + *result = NSH_INPUT_NEXT_IP4_INPUT; + else if (unformat (input, "ip6")) + *result = NSH_INPUT_NEXT_IP6_INPUT; + else if (unformat (input, "ethernet")) + *result = NSH_INPUT_NEXT_IP6_INPUT; + else if (unformat (input, "%d", &tmp)) + *result = tmp; + else + return 0; + return 1; +} + +static clib_error_t * +nsh_gre_add_del_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ip4_address_t src, dst; + u8 is_add = 1; + u8 src_set = 0; + u8 dst_set = 0; + u32 encap_fib_index = 0; + u32 decap_fib_index = 0; + u8 ver_o_c = 0; + u8 length = 0; + u8 md_type = 0; + u8 next_protocol = 1; /* ip4 */ + u32 spi; + u8 spi_set = 0; + u32 si; + u8 si_set = 0; + u32 spi_si; + u32 c1 = 0; + u32 c2 = 0; + u32 c3 = 0; + u32 c4 = 0; + u32 decap_next_index = 1; /* ip4_input */ + u32 *tlvs = 0; + u32 tmp; + int rv; + vnet_nsh_gre_add_del_tunnel_args_t _a, * a = &_a; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "src %U", + unformat_ip4_address, &src)) + src_set = 1; + else if (unformat (line_input, "dst %U", + unformat_ip4_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "encap-vrf-id %d", &tmp)) + { + encap_fib_index = fib_index_from_fib_id (tmp); + if (encap_fib_index == ~0) + return clib_error_return (0, "nonexistent encap fib id %d", tmp); + } + else if (unformat (line_input, "decap-vrf-id %d", &tmp)) + { + decap_fib_index = fib_index_from_fib_id (tmp); + if (decap_fib_index == ~0) + return clib_error_return (0, "nonexistent decap fib id %d", tmp); + } + else if (unformat (line_input, "decap-next %U", unformat_decap_next, + &decap_next_index)) + ; + else if (unformat (line_input, "version %d", &tmp)) + ver_o_c |= (tmp & 3) << 6; + else if (unformat (line_input, "o-bit %d", &tmp)) + ver_o_c |= (tmp & 1) << 5; + else if (unformat (line_input, "c-bit %d", &tmp)) + ver_o_c |= (tmp & 1) << 4; + else if (unformat (line_input, "md-type %d", &tmp)) + md_type = tmp; + else if (unformat(line_input, "next-ip4")) + next_protocol = 1; + else if (unformat(line_input, "next-ip6")) + next_protocol = 2; + else if (unformat(line_input, "next-ethernet")) + next_protocol = 3; + else if (unformat (line_input, "c1 %d", &c1)) + ; + else if (unformat (line_input, "c2 %d", &c2)) + ; + else if (unformat (line_input, "c3 %d", &c3)) + ; + else if (unformat (line_input, "c4 %d", &c4)) + ; + else if (unformat (line_input, "spi %d", &spi)) + spi_set = 1; + else if (unformat (line_input, "si %d", &si)) + si_set = 1; + else if (unformat (line_input, "tlv %x")) + vec_add1 (tlvs, tmp); + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (src_set == 0) + return clib_error_return (0, "tunnel src address not specified"); + + if (dst_set == 0) + return clib_error_return (0, "tunnel dst address not specified"); + + if (spi_set == 0) + return clib_error_return (0, "spi not specified"); + + if (si_set == 0) + return clib_error_return (0, "si not specified"); + + spi_si = (spi<<8) | si; + + memset (a, 0, sizeof (*a)); + + a->is_add = is_add; + +#define _(x) a->x = x; + foreach_copy_field; +#undef _ + + rv = vnet_nsh_gre_add_del_tunnel (a, 0 /* hw_if_indexp */); + + switch(rv) + { + case 0: + break; + case VNET_API_ERROR_INVALID_DECAP_NEXT: + return clib_error_return (0, "invalid decap-next..."); + + case VNET_API_ERROR_TUNNEL_EXIST: + return clib_error_return (0, "tunnel already exists..."); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "session does not exist..."); + + default: + return clib_error_return + (0, "vnet_nsh_gre_add_del_tunnel returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (create_nsh_gre_tunnel_command, static) = { + .path = "nsh gre tunnel", + .short_help = + "nsh gre tunnel src <ip4-addr> dst <ip4-addr>" + " c1 <nn> c2 <nn> c3 <nn> c4 <nn> spi <nn> si <nn>\n" + " [encap-fib-id <nn>] [decap-fib-id <nn>] [o-bit <1|0>] [c-bit <1|0>]\n" + " [md-type <nn>][next-ip4][next-ip6][next-ethernet]\n" + " [tlv <xx>]\n", + .function = nsh_gre_add_del_tunnel_command_fn, +}; + +static clib_error_t * +show_nsh_gre_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + nsh_gre_main_t * ngm = &nsh_gre_main; + nsh_gre_tunnel_t * t; + + if (pool_elts (ngm->tunnels) == 0) + vlib_cli_output (vm, "No nsh-gre tunnels configured..."); + + pool_foreach (t, ngm->tunnels, + ({ + vlib_cli_output (vm, "%U", format_nsh_gre_tunnel, t); + })); + + return 0; +} + +VLIB_CLI_COMMAND (show_nsh_gre_tunnel_command, static) = { + .path = "show nsh gre tunnel", + .function = show_nsh_gre_tunnel_command_fn, +}; + +clib_error_t *nsh_gre_init (vlib_main_t *vm) +{ + nsh_gre_main_t *ngm = &nsh_gre_main; + + ngm->vnet_main = vnet_get_main(); + ngm->vlib_main = vm; + + ngm->nsh_gre_tunnel_by_src_address = hash_create (0, sizeof (uword)); + gre_register_input_protocol (vm, GRE_PROTOCOL_nsh, + nsh_gre_input_node.index); + return 0; +} + +VLIB_INIT_FUNCTION(nsh_gre_init); + diff --git a/vnet/vnet/nsh-gre/nsh_gre.h b/vnet/vnet/nsh-gre/nsh_gre.h new file mode 100644 index 00000000000..c82c80f6a68 --- /dev/null +++ b/vnet/vnet/nsh-gre/nsh_gre.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_nsh_gre_h +#define included_vnet_nsh_gre_h + +#include <vnet/vnet.h> +#include <vnet/gre/gre.h> +#include <vnet/nsh-gre/nsh_gre_packet.h> +#include <vnet/ip/ip4_packet.h> + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; /* 20 bytes */ + gre_header_t gre; /* 4 bytes */ + nsh_header_t nsh; /* 28 bytes */ +}) ip4_gre_and_nsh_header_t; + +typedef struct { + /* Rewrite string. $$$$ embed vnet_rewrite header */ + u8 * rewrite; + + /* tunnel src and dst addresses */ + ip4_address_t src; + ip4_address_t dst; + + /* FIB indices */ + u32 encap_fib_index; /* tunnel partner lookup here */ + u32 decap_fib_index; /* inner IP lookup here */ + + /* when decapsulating, send pkts here */ + u32 decap_next_index; + + /* vnet intfc hw/sw_if_index */ + u32 hw_if_index; + u32 sw_if_index; + + /* NSH header fields in HOST byte order */ + u8 ver_o_c; + u8 length; + u8 md_type; + u8 next_protocol; + u32 spi_si; + + /* Context headers, always present, in HOST byte order */ + u32 c1, c2, c3, c4; + u32 * tlvs; +} nsh_gre_tunnel_t; + +#define foreach_nsh_gre_input_next \ + _ (DROP, "error-drop") \ + _ (IP4_INPUT, "ip4-input") \ + _ (IP6_INPUT, "ip6-input") \ + _ (ETHERNET_INPUT, "ethernet-input") + +typedef enum { +#define _(s,n) NSH_INPUT_NEXT_##s, + foreach_nsh_gre_input_next +#undef _ + NSH_INPUT_N_NEXT, +} nsh_gre_input_next_t; + +typedef enum { +#define nsh_gre_error(n,s) NSH_GRE_ERROR_##n, +#include <vnet/nsh-gre/nsh_gre_error.def> +#undef nsh_gre_error + NSH_GRE_N_ERROR, +} nsh_gre_input_error_t; + +typedef struct { + /* vector of encap tunnel instances */ + nsh_gre_tunnel_t *tunnels; + + /* lookup tunnel by tunnel partner src address */ + uword * nsh_gre_tunnel_by_src_address; + + /* Free vlib hw_if_indices */ + u32 * free_nsh_gre_tunnel_hw_if_indices; + + /* show device instance by real device instance */ + u32 * dev_inst_by_real; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} nsh_gre_main_t; + +nsh_gre_main_t nsh_gre_main; + +vlib_node_registration_t nsh_gre_input_node; +vlib_node_registration_t nsh_gre_encap_node; + +u8 * format_nsh_gre_encap_trace (u8 * s, va_list * args); + +typedef struct { + u8 is_add; + ip4_address_t src, dst; + u32 encap_fib_index; + u32 decap_fib_index; + u32 decap_next_index; + u8 ver_o_c; + u8 length; + u8 md_type; + u8 next_protocol; + u32 spi_si; + u32 c1, c2, c3, c4; + u32 * tlvs; +} vnet_nsh_gre_add_del_tunnel_args_t; + +int vnet_nsh_gre_add_del_tunnel (vnet_nsh_gre_add_del_tunnel_args_t *a, + u32 * sw_if_indexp); + +#endif /* included_vnet_nsh_gre_h */ diff --git a/vnet/vnet/nsh-gre/nsh_gre_error.def b/vnet/vnet/nsh-gre/nsh_gre_error.def new file mode 100644 index 00000000000..532b02a6e89 --- /dev/null +++ b/vnet/vnet/nsh-gre/nsh_gre_error.def @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +nsh_gre_error (DECAPSULATED, "good packets decapsulated") +nsh_gre_error (NO_SUCH_TUNNEL, "no such tunnel packets") +nsh_gre_error (INVALID_NEXT_PROTOCOL, "invalid next protocol") diff --git a/vnet/vnet/nsh-gre/nsh_gre_packet.h b/vnet/vnet/nsh-gre/nsh_gre_packet.h new file mode 100644 index 00000000000..0620f227b9f --- /dev/null +++ b/vnet/vnet/nsh-gre/nsh_gre_packet.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_nsh_gre_packet_h +#define included_vnet_nsh_gre_packet_h + +/* + * NSH_GRE packet format from draft-quinn-sfc-nsh-03.txt + * + * NSH Base Header + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Ver|O|C|R|R|R|R|R|R| Length | MD Type | Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * + * Base Header Field Descriptions: + * + * Version: The version field is used to ensure backward compatibility + * going forward with future NSH updates. + * + * O bit: Indicates that this packet is an operations and management + * (OAM) packet. SFF and SFs nodes MUST examine the payload and take + * appropriate action (e.g. return status information). + * + * OAM message specifics and handling details are outside the scope of + * this document. + * + * C bit: Indicates that a critical metadata TLV is present (see section + * 7). This bit acts as an indication for hardware implementers to + * decide how to handle the presence of a critical TLV without + * necessarily needing to parse all TLVs present. The C bit MUST be set + * to 1 if one or more critical TLVs are present. + * + * All other flag fields are reserved. + * + * Length: total length, in 4 byte words, of the NSH header, including + * optional variable TLVs. Length must be equal or greater than 6. + * + * MD Type: indicates the format of NSH beyond the base header and the + * type of metadata being carried. This typing is used to describe the + * use for the metadata. A new registry will be requested from IANA for + * the MD Type. NSH defines one type, type = 0x1 which indicates that + * the format of the header is as per this draft. + * + * The format of the base header is invariant, and not described by MD + * Type. + * + * Next Protocol: indicates the protocol type of the original packet. A + * new IANA registry will be created for protocol type. + * + * This draft defines the following Next Protocol values: + * + * 0x1 : IPv4 + * 0x2 : IPv6 + * 0x3 : Ethernet + */ + +typedef CLIB_PACKED(struct { + u8 ver_o_c; + u8 length; + u8 md_type; + u8 next_protocol; + u32 spi_si; + /* Context headers, always present */ + u32 c1; u32 c2; u32 c3; u32 c4; + + /* Optional variable length metadata */ + u32 tlvs[0]; +}) nsh_header_t; + +#define NSH_GRE_VERSION (0<<6) +#define NSH_GRE_O_BIT (1<<5) +#define NSH_GRE_C_BIT (1<<4) + +/* Network byte order shift / mask */ +#define NSH_GRE_SINDEX_MASK 0xFF +#define NSH_GRE_SPI_MASK (0x00FFFFFF) +#define NSH_GRE_SPI_SHIFT 8 + +#endif /* included_vnet_nsh_gre_packet_h */ diff --git a/vnet/vnet/nsh-vxlan-gpe/decap.c b/vnet/vnet/nsh-vxlan-gpe/decap.c new file mode 100644 index 00000000000..62bb0f81dc7 --- /dev/null +++ b/vnet/vnet/nsh-vxlan-gpe/decap.c @@ -0,0 +1,365 @@ +/* + * nsh.c: nsh packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h> + +vlib_node_registration_t nsh_vxlan_gpe_input_node; + +/* From nsh-gre */ +u8 * format_nsh_header_with_length (u8 * s, va_list * args); + +typedef struct { + u32 next_index; + u32 tunnel_index; + u32 error; + nsh_header_t h; +} nsh_vxlan_gpe_rx_trace_t; + +static u8 * format_nsh_vxlan_gpe_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nsh_vxlan_gpe_rx_trace_t * t = va_arg (*args, nsh_vxlan_gpe_rx_trace_t *); + + if (t->tunnel_index != ~0) + { + s = format (s, "NSH-VXLAN: tunnel %d next %d error %d", t->tunnel_index, + t->next_index, t->error); + } + else + { + s = format (s, "NSH-VXLAN: no tunnel next %d error %d\n", t->next_index, + t->error); + } + s = format (s, "\n %U", format_nsh_header_with_length, &t->h, + (u32) sizeof (t->h) /* max size */); + return s; +} + +static uword +nsh_vxlan_gpe_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main; + u32 last_tunnel_index = ~0; + nsh_vxlan_gpe_tunnel_key_t last_key; + u32 pkts_decapsulated = 0; + + memset (&last_key, 0xff, sizeof (last_key)); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + ip4_vxlan_gpe_and_nsh_header_t * iuvn0, * iuvn1; + uword * p0, * p1; + u32 tunnel_index0, tunnel_index1; + nsh_vxlan_gpe_tunnel_t * t0, * t1; + nsh_vxlan_gpe_tunnel_key_t key0, key1; + u32 error0, error1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* udp leaves current_data pointing at the vxlan header */ + vlib_buffer_advance + (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); + vlib_buffer_advance + (b1, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); + + iuvn0 = vlib_buffer_get_current (b0); + iuvn1 = vlib_buffer_get_current (b1); + + /* pop (ip, udp, vxlan, nsh) */ + vlib_buffer_advance (b0, sizeof (*iuvn0)); + vlib_buffer_advance (b1, sizeof (*iuvn1)); + + tunnel_index0 = ~0; + error0 = 0; + next0 = NSH_VXLAN_GPE_INPUT_NEXT_DROP; + + tunnel_index1 = ~0; + error1 = 0; + next1 = NSH_VXLAN_GPE_INPUT_NEXT_DROP; + + key0.src = iuvn0->ip4.src_address.as_u32; + key0.vni = iuvn0->vxlan.vni_res; + key0.spi_si = iuvn0->nsh.spi_si; + key0.pad = 0; + + if (PREDICT_FALSE ((key0.as_u64[0] != last_key.as_u64[0]) + || (key0.as_u64[1] != last_key.as_u64[1]))) + { + p0 = hash_get_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, &key0); + + if (p0 == 0) + { + error0 = NSH_VXLAN_GPE_ERROR_NO_SUCH_TUNNEL; + goto trace0; + } + + last_key.as_u64[0] = key0.as_u64[0]; + last_key.as_u64[1] = key0.as_u64[1]; + tunnel_index0 = last_tunnel_index = p0[0]; + } + else + tunnel_index0 = last_tunnel_index; + + t0 = pool_elt_at_index (ngm->tunnels, tunnel_index0); + + next0 = t0->decap_next_index; + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b0); + + /* + * ip[46] lookup in the configured FIB + * nsh-vxlan-gpe-encap, here's the encap tunnel sw_if_index + */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index; + + trace0: + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_vxlan_gpe_rx_trace_t *tr + = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->error = error0; + tr->tunnel_index = tunnel_index0; + tr->h = iuvn0->nsh; + } + + key1.src = iuvn1->ip4.src_address.as_u32; + key1.vni = iuvn1->vxlan.vni_res; + key1.spi_si = iuvn1->nsh.spi_si; + key1.pad = 0; + + if (PREDICT_FALSE ((key1.as_u64[0] != last_key.as_u64[0]) + || (key1.as_u64[1] != last_key.as_u64[1]))) + { + p1 = hash_get_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, &key1); + + if (p1 == 0) + { + error1 = NSH_VXLAN_GPE_ERROR_NO_SUCH_TUNNEL; + goto trace1; + } + + last_key.as_u64[0] = key1.as_u64[0]; + last_key.as_u64[1] = key1.as_u64[1]; + tunnel_index1 = last_tunnel_index = p1[0]; + } + else + tunnel_index1 = last_tunnel_index; + + t1 = pool_elt_at_index (ngm->tunnels, tunnel_index1); + + next1 = t1->decap_next_index; + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b1); + + /* + * ip[46] lookup in the configured FIB + * nsh-vxlan-gpe-encap, here's the encap tunnel sw_if_index + */ + vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index; + pkts_decapsulated += 2; + + trace1: + b1->error = error1 ? node->errors[error1] : 0; + + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_vxlan_gpe_rx_trace_t *tr + = vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->next_index = next1; + tr->error = error1; + tr->tunnel_index = tunnel_index1; + tr->h = iuvn1->nsh; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + ip4_vxlan_gpe_and_nsh_header_t * iuvn0; + uword * p0; + u32 tunnel_index0; + nsh_vxlan_gpe_tunnel_t * t0; + nsh_vxlan_gpe_tunnel_key_t key0; + u32 error0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* udp leaves current_data pointing at the vxlan header */ + vlib_buffer_advance + (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); + + iuvn0 = vlib_buffer_get_current (b0); + + /* pop (ip, udp, vxlan, nsh) */ + vlib_buffer_advance (b0, sizeof (*iuvn0)); + + tunnel_index0 = ~0; + error0 = 0; + next0 = NSH_VXLAN_GPE_INPUT_NEXT_DROP; + + key0.src = iuvn0->ip4.src_address.as_u32; + key0.vni = iuvn0->vxlan.vni_res; + key0.spi_si = iuvn0->nsh.spi_si; + key0.pad = 0; + + if (PREDICT_FALSE ((key0.as_u64[0] != last_key.as_u64[0]) + || (key0.as_u64[1] != last_key.as_u64[1]))) + { + p0 = hash_get_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, &key0); + + if (p0 == 0) + { + error0 = NSH_VXLAN_GPE_ERROR_NO_SUCH_TUNNEL; + goto trace00; + } + + last_key.as_u64[0] = key0.as_u64[0]; + last_key.as_u64[1] = key0.as_u64[1]; + tunnel_index0 = last_tunnel_index = p0[0]; + } + else + tunnel_index0 = last_tunnel_index; + + t0 = pool_elt_at_index (ngm->tunnels, tunnel_index0); + + next0 = t0->decap_next_index; + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b0); + + /* + * ip[46] lookup in the configured FIB + * nsh-vxlan-gpe-encap, here's the encap tunnel sw_if_index + */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index; + pkts_decapsulated ++; + + trace00: + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_vxlan_gpe_rx_trace_t *tr + = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->error = error0; + tr->tunnel_index = tunnel_index0; + tr->h = iuvn0->nsh; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, nsh_vxlan_gpe_input_node.index, + NSH_VXLAN_GPE_ERROR_DECAPSULATED, + pkts_decapsulated); + return from_frame->n_vectors; +} + +static char * nsh_vxlan_gpe_error_strings[] = { +#define nsh_vxlan_gpe_error(n,s) s, +#include <vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def> +#undef nsh_vxlan_gpe_error +#undef _ +}; + +VLIB_REGISTER_NODE (nsh_vxlan_gpe_input_node) = { + .function = nsh_vxlan_gpe_input, + .name = "nsh-vxlan-gpe-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = NSH_VXLAN_GPE_N_ERROR, + .error_strings = nsh_vxlan_gpe_error_strings, + + .n_next_nodes = NSH_VXLAN_GPE_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [NSH_VXLAN_GPE_INPUT_NEXT_##s] = n, + foreach_nsh_vxlan_gpe_input_next +#undef _ + }, + + .format_buffer = format_nsh_header_with_length, + .format_trace = format_nsh_vxlan_gpe_rx_trace, + // $$$$ .unformat_buffer = unformat_nsh_vxlan_gpe_header, +}; diff --git a/vnet/vnet/nsh-vxlan-gpe/encap.c b/vnet/vnet/nsh-vxlan-gpe/encap.c new file mode 100644 index 00000000000..0ccdf60c6aa --- /dev/null +++ b/vnet/vnet/nsh-vxlan-gpe/encap.c @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h> + +/* Statistics (not really errors) */ +#define foreach_nsh_vxlan_gpe_encap_error \ +_(ENCAPSULATED, "good packets encapsulated") + +static char * nsh_vxlan_gpe_encap_error_strings[] = { +#define _(sym,string) string, + foreach_nsh_vxlan_gpe_encap_error +#undef _ +}; + +typedef enum { +#define _(sym,str) NSH_VXLAN_GPE_ENCAP_ERROR_##sym, + foreach_nsh_vxlan_gpe_encap_error +#undef _ + NSH_VXLAN_GPE_ENCAP_N_ERROR, +} nsh_vxlan_gpe_encap_error_t; + +typedef enum { + NSH_VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP, + NSH_VXLAN_GPE_ENCAP_NEXT_DROP, + NSH_VXLAN_GPE_ENCAP_N_NEXT, +} nsh_vxlan_gpe_encap_next_t; + +typedef struct { + u32 tunnel_index; +} nsh_vxlan_gpe_encap_trace_t; + +u8 * format_nsh_vxlan_gpe_encap_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nsh_vxlan_gpe_encap_trace_t * t + = va_arg (*args, nsh_vxlan_gpe_encap_trace_t *); + + s = format (s, "NSH-VXLAN-ENCAP: tunnel %d", t->tunnel_index); + return s; +} + +#define foreach_fixed_header_offset \ +_(0) _(1) _(2) _(3) _(4) _(5) _(6) + +static uword +nsh_vxlan_gpe_encap (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main; + vnet_main_t * vnm = ngm->vnet_main; + u32 pkts_encapsulated = 0; + u16 old_l0 = 0, old_l1 = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0 = NSH_VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP; + u32 next1 = NSH_VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP; + vnet_hw_interface_t * hi0, * hi1; + ip4_header_t * ip0, * ip1; + udp_header_t * udp0, * udp1; + u64 * copy_src0, * copy_dst0; + u64 * copy_src1, * copy_dst1; + u32 * copy_src_last0, * copy_dst_last0; + u32 * copy_src_last1, * copy_dst_last1; + nsh_vxlan_gpe_tunnel_t * t0, * t1; + u16 new_l0, new_l1; + ip_csum_t sum0, sum1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* 1-wide cache? */ + hi0 = vnet_get_sup_hw_interface + (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]); + hi1 = vnet_get_sup_hw_interface + (vnm, vnet_buffer(b1)->sw_if_index[VLIB_TX]); + + t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance); + t1 = pool_elt_at_index (ngm->tunnels, hi1->dev_instance); + + ASSERT(vec_len(t0->rewrite) >= 24); + ASSERT(vec_len(t1->rewrite) >= 24); + + /* Apply the rewrite string. $$$$ vnet_rewrite? */ + vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite)); + vlib_buffer_advance (b1, -(word)_vec_len(t1->rewrite)); + + ip0 = vlib_buffer_get_current(b0); + ip1 = vlib_buffer_get_current(b1); + /* Copy the fixed header */ + copy_dst0 = (u64 *) ip0; + copy_src0 = (u64 *) t0->rewrite; + copy_dst1 = (u64 *) ip1; + copy_src1 = (u64 *) t1->rewrite; + + ASSERT (sizeof (ip4_vxlan_gpe_and_nsh_header_t) == 60); + + /* Copy first 56 octets 8-bytes at a time */ +#define _(offs) copy_dst0[offs] = copy_src0[offs]; + foreach_fixed_header_offset; +#undef _ +#define _(offs) copy_dst1[offs] = copy_src1[offs]; + foreach_fixed_header_offset; +#undef _ + + /* Last 4 octets. Hopefully gcc will be our friend */ + copy_dst_last0 = (u32 *)(©_dst0[7]); + copy_src_last0 = (u32 *)(©_src0[7]); + copy_dst_last1 = (u32 *)(©_dst1[7]); + copy_src_last1 = (u32 *)(©_src1[7]); + + copy_dst_last0[0] = copy_src_last0[0]; + copy_dst_last1[0] = copy_src_last1[0]; + + /* If there are TLVs to copy, do so */ + if (PREDICT_FALSE (_vec_len(t0->rewrite) > 64)) + memcpy (©_dst0[3], t0->rewrite + 64 , + _vec_len (t0->rewrite)-64); + + if (PREDICT_FALSE (_vec_len(t1->rewrite) > 64)) + memcpy (©_dst0[3], t1->rewrite + 64 , + _vec_len (t1->rewrite)-64); + + /* fix the <bleep>ing outer-IP checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + sum1 = ip1->checksum; + /* old_l1 always 0, see the rewrite setup */ + new_l1 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); + + sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, + length /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + ip1->length = new_l1; + + /* Fix UDP length */ + udp0 = (udp_header_t *)(ip0+1); + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + udp1 = (udp_header_t *)(ip1+1); + new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) + - sizeof (*ip1)); + + udp0->length = new_l0; + udp1->length = new_l1; + + /* Reset to look up tunnel partner in the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_vxlan_gpe_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->tunnel_index = t0 - ngm->tunnels; + } + + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_vxlan_gpe_encap_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->tunnel_index = t1 - ngm->tunnels; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = NSH_VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP; + vnet_hw_interface_t * hi0; + ip4_header_t * ip0; + udp_header_t * udp0; + u64 * copy_src0, * copy_dst0; + u32 * copy_src_last0, * copy_dst_last0; + nsh_vxlan_gpe_tunnel_t * t0; + u16 new_l0; + ip_csum_t sum0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* 1-wide cache? */ + hi0 = vnet_get_sup_hw_interface + (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]); + + t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance); + + ASSERT(vec_len(t0->rewrite) >= 24); + + /* Apply the rewrite string. $$$$ vnet_rewrite? */ + vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite)); + + ip0 = vlib_buffer_get_current(b0); + /* Copy the fixed header */ + copy_dst0 = (u64 *) ip0; + copy_src0 = (u64 *) t0->rewrite; + + ASSERT (sizeof (ip4_vxlan_gpe_and_nsh_header_t) == 60); + + /* Copy first 56 octets 8-bytes at a time */ +#define _(offs) copy_dst0[offs] = copy_src0[offs]; + foreach_fixed_header_offset; +#undef _ + /* Last 4 octets. Hopefully gcc will be our friend */ + copy_dst_last0 = (u32 *)(©_dst0[7]); + copy_src_last0 = (u32 *)(©_src0[7]); + + copy_dst_last0[0] = copy_src_last0[0]; + + /* If there are TLVs to copy, do so */ + if (PREDICT_FALSE (_vec_len(t0->rewrite) > 64)) + memcpy (©_dst0[3], t0->rewrite + 64 , + _vec_len (t0->rewrite)-64); + + /* fix the <bleep>ing outer-IP checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + /* Fix UDP length */ + udp0 = (udp_header_t *)(ip0+1); + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + + udp0->length = new_l0; + + /* Reset to look up tunnel partner in the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; + pkts_encapsulated ++; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_vxlan_gpe_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->tunnel_index = t0 - ngm->tunnels; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, node->node_index, + NSH_VXLAN_GPE_ENCAP_ERROR_ENCAPSULATED, + pkts_encapsulated); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (nsh_vxlan_gpe_encap_node) = { + .function = nsh_vxlan_gpe_encap, + .name = "nsh-vxlan-gpe-encap", + .vector_size = sizeof (u32), + .format_trace = format_nsh_vxlan_gpe_encap_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(nsh_vxlan_gpe_encap_error_strings), + .error_strings = nsh_vxlan_gpe_encap_error_strings, + + .n_next_nodes = NSH_VXLAN_GPE_ENCAP_N_NEXT, + + .next_nodes = { + [NSH_VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [NSH_VXLAN_GPE_ENCAP_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.c b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.c new file mode 100644 index 00000000000..8cc46d3d3eb --- /dev/null +++ b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.c @@ -0,0 +1,562 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h> + +nsh_vxlan_gpe_main_t nsh_vxlan_gpe_main; + +static u8 * format_decap_next (u8 * s, va_list * args) +{ + u32 next_index = va_arg (*args, u32); + + switch (next_index) + { + case NSH_VXLAN_GPE_INPUT_NEXT_DROP: + return format (s, "drop"); + case NSH_VXLAN_GPE_INPUT_NEXT_IP4_INPUT: + return format (s, "ip4"); + case NSH_VXLAN_GPE_INPUT_NEXT_IP6_INPUT: + return format (s, "ip6"); + case NSH_VXLAN_GPE_INPUT_NEXT_NSH_VXLAN_GPE_ENCAP: + return format (s, "nsh-vxlan-gpe"); + default: + return format (s, "unknown %d", next_index); + } + return s; +} + +u8 * format_nsh_vxlan_gpe_tunnel (u8 * s, va_list * args) +{ + nsh_vxlan_gpe_tunnel_t * t = va_arg (*args, nsh_vxlan_gpe_tunnel_t *); + nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main; + + s = format (s, + "[%d] %U (src) %U (dst) fibs: encap %d, decap %d", + t - ngm->tunnels, + format_ip4_address, &t->src, + format_ip4_address, &t->dst, + t->encap_fib_index, + t->decap_fib_index); + s = format (s, " decap next %U\n", format_decap_next, t->decap_next_index); + s = format (s, " vxlan VNI %d ", t->vni); + s = format (s, "nsh ver %d ", (t->ver_o_c>>6)); + if (t->ver_o_c & NSH_GRE_O_BIT) + s = format (s, "O-set "); + + if (t->ver_o_c & NSH_GRE_C_BIT) + s = format (s, "C-set "); + + s = format (s, "len %d (%d bytes) md_type %d next_protocol %d\n", + t->length, t->length * 4, t->md_type, t->next_protocol); + + s = format (s, " service path %d service index %d\n", + (t->spi_si>>NSH_GRE_SPI_SHIFT) & NSH_GRE_SPI_MASK, + t->spi_si & NSH_GRE_SINDEX_MASK); + + s = format (s, " c1 %d c2 %d c3 %d c4 %d\n", + t->c1, t->c2, t->c3, t->c4); + + return s; +} + +static u8 * format_nsh_vxlan_gpe_name (u8 * s, va_list * args) +{ + nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main; + u32 i = va_arg (*args, u32); + u32 show_dev_instance = ~0; + + if (i < vec_len (ngm->dev_inst_by_real)) + show_dev_instance = ngm->dev_inst_by_real[i]; + + if (show_dev_instance != ~0) + i = show_dev_instance; + + return format (s, "nsh_vxlan_gpe_tunnel%d", i); +} + +static int nsh_vxlan_gpe_name_renumber (vnet_hw_interface_t * hi, + u32 new_dev_instance) +{ + nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main; + + vec_validate_init_empty (ngm->dev_inst_by_real, hi->dev_instance, ~0); + + ngm->dev_inst_by_real [hi->dev_instance] = new_dev_instance; + + return 0; +} + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +VNET_DEVICE_CLASS (nsh_vxlan_gpe_device_class,static) = { + .name = "NSH_VXLAN_GPE", + .format_device_name = format_nsh_vxlan_gpe_name, + .format_tx_trace = format_nsh_vxlan_gpe_encap_trace, + .tx_function = dummy_interface_tx, + .name_renumber = nsh_vxlan_gpe_name_renumber, +}; + +static uword dummy_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + return 0; +} + +static u8 * format_nsh_vxlan_gpe_header_with_length (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + s = format (s, "unimplemented dev %u", dev_instance); + return s; +} + +VNET_HW_INTERFACE_CLASS (nsh_vxlan_gpe_hw_class) = { + .name = "NSH_VXLAN_GPE", + .format_header = format_nsh_vxlan_gpe_header_with_length, + .set_rewrite = dummy_set_rewrite, +}; + +#define foreach_copy_field \ +_(src.as_u32) \ +_(dst.as_u32) \ +_(vni) \ +_(encap_fib_index) \ +_(decap_fib_index) \ +_(decap_next_index) \ +_(ver_o_c) \ +_(length) \ +_(md_type) \ +_(next_protocol) \ +_(spi_si) \ +_(c1) \ +_(c2) \ +_(c3) \ +_(c4) \ +_(tlvs) + +#define foreach_32bit_field \ +_(spi_si) \ +_(c1) \ +_(c2) \ +_(c3) \ +_(c4) + +static int nsh_vxlan_gpe_rewrite (nsh_vxlan_gpe_tunnel_t * t) +{ + u8 *rw = 0; + ip4_header_t * ip0; + nsh_header_t * nsh0; + ip4_vxlan_gpe_and_nsh_header_t * h0; + int len; + + len = sizeof (*h0) + vec_len(t->tlvs)*4; + + vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES); + + h0 = (ip4_vxlan_gpe_and_nsh_header_t *) rw; + + /* Fixed portion of the (outer) ip4 header */ + ip0 = &h0->ip4; + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_UDP; + + /* we fix up the ip4 header length and checksum after-the-fact */ + ip0->src_address.as_u32 = t->src.as_u32; + ip0->dst_address.as_u32 = t->dst.as_u32; + ip0->checksum = ip4_header_checksum (ip0); + + /* UDP header, randomize src port on something, maybe? */ + h0->udp.src_port = clib_host_to_net_u16 (4790); + h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gpe); + + /* VXLAN header. Are we having fun yet? */ + h0->vxlan.flags = VXLAN_GPE_FLAGS_I | VXLAN_GPE_FLAGS_P; + h0->vxlan.ver_res = VXLAN_GPE_VERSION; + h0->vxlan.next_protocol = VXLAN_NEXT_PROTOCOL_NSH; + h0->vxlan.vni_res = clib_host_to_net_u32 (t->vni<<8); + + /* NSH header */ + nsh0 = &h0->nsh; + nsh0->ver_o_c = t->ver_o_c; + nsh0->md_type = t->md_type; + nsh0->next_protocol = t->next_protocol; + nsh0->spi_si = t->spi_si; + nsh0->c1 = t->c1; + nsh0->c2 = t->c2; + nsh0->c3 = t->c3; + nsh0->c4 = t->c4; + + /* Endian swap 32-bit fields */ +#define _(x) nsh0->x = clib_host_to_net_u32(nsh0->x); + foreach_32bit_field; +#undef _ + + /* fix nsh header length */ + t->length = 6 + vec_len(t->tlvs); + nsh0->length = t->length; + + /* Copy any TLVs */ + if (vec_len(t->tlvs)) + memcpy (nsh0->tlvs, t->tlvs, 4*vec_len(t->tlvs)); + + t->rewrite = rw; + return (0); +} + +int vnet_nsh_vxlan_gpe_add_del_tunnel +(vnet_nsh_vxlan_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp) +{ + nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main; + nsh_vxlan_gpe_tunnel_t *t = 0; + vnet_main_t * vnm = ngm->vnet_main; + vnet_hw_interface_t * hi; + uword * p; + u32 hw_if_index = ~0; + u32 sw_if_index = ~0; + int rv; + nsh_vxlan_gpe_tunnel_key_t key, *key_copy; + hash_pair_t *hp; + + key.src = a->dst.as_u32; /* decap src in key is encap dst in config */ + key.vni = clib_host_to_net_u32 (a->vni << 8); + key.spi_si = clib_host_to_net_u32(a->spi_si); + + p = hash_get_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, &key); + + if (a->is_add) + { + /* adding a tunnel: tunnel must not already exist */ + if (p) + return VNET_API_ERROR_INVALID_VALUE; + + if (a->decap_next_index >= NSH_VXLAN_GPE_INPUT_N_NEXT) + return VNET_API_ERROR_INVALID_DECAP_NEXT; + + pool_get_aligned (ngm->tunnels, t, CLIB_CACHE_LINE_BYTES); + memset (t, 0, sizeof (*t)); + + /* copy from arg structure */ +#define _(x) t->x = a->x; + foreach_copy_field; +#undef _ + + rv = nsh_vxlan_gpe_rewrite (t); + + if (rv) + { + pool_put (ngm->tunnels, t); + return rv; + } + + key_copy = clib_mem_alloc (sizeof (*key_copy)); + memcpy (key_copy, &key, sizeof (*key_copy)); + + hash_set_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, key_copy, + t - ngm->tunnels); + + if (vec_len (ngm->free_nsh_vxlan_gpe_tunnel_hw_if_indices) > 0) + { + hw_if_index = ngm->free_nsh_vxlan_gpe_tunnel_hw_if_indices + [vec_len (ngm->free_nsh_vxlan_gpe_tunnel_hw_if_indices)-1]; + _vec_len (ngm->free_nsh_vxlan_gpe_tunnel_hw_if_indices) -= 1; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = t - ngm->tunnels; + hi->hw_instance = hi->dev_instance; + } + else + { + hw_if_index = vnet_register_interface + (vnm, nsh_vxlan_gpe_device_class.index, t - ngm->tunnels, + nsh_vxlan_gpe_hw_class.index, t - ngm->tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->output_node_index = nsh_vxlan_gpe_encap_node.index; + } + + t->hw_if_index = hw_if_index; + t->sw_if_index = sw_if_index = hi->sw_if_index; + + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + } + else + { + /* deleting a tunnel: tunnel must exist */ + if (!p) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + t = pool_elt_at_index (ngm->tunnels, p[0]); + + vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */); + vec_add1 (ngm->free_nsh_vxlan_gpe_tunnel_hw_if_indices, t->hw_if_index); + + hp = hash_get_pair (ngm->nsh_vxlan_gpe_tunnel_by_key, &key); + key_copy = (void *)(hp->key); + hash_unset_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, &key); + clib_mem_free (key_copy); + + vec_free (t->rewrite); + pool_put (ngm->tunnels, t); + } + + if (sw_if_indexp) + *sw_if_indexp = sw_if_index; + + return 0; +} + +static u32 fib_index_from_fib_id (u32 fib_id) +{ + ip4_main_t * im = &ip4_main; + uword * p; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (!p) + return ~0; + + return p[0]; +} + +static uword unformat_decap_next (unformat_input_t * input, va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + u32 tmp; + + if (unformat (input, "drop")) + *result = NSH_VXLAN_GPE_INPUT_NEXT_DROP; + else if (unformat (input, "ip4")) + *result = NSH_VXLAN_GPE_INPUT_NEXT_IP4_INPUT; + else if (unformat (input, "ip6")) + *result = NSH_VXLAN_GPE_INPUT_NEXT_IP6_INPUT; + else if (unformat (input, "ethernet")) + *result = NSH_VXLAN_GPE_INPUT_NEXT_IP6_INPUT; + else if (unformat (input, "nsh-vxlan-gpe")) + *result = NSH_VXLAN_GPE_INPUT_NEXT_NSH_VXLAN_GPE_ENCAP; + else if (unformat (input, "%d", &tmp)) + *result = tmp; + else + return 0; + return 1; +} + +static clib_error_t * +nsh_vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ip4_address_t src, dst; + u8 is_add = 1; + u8 src_set = 0; + u8 dst_set = 0; + u32 encap_fib_index = 0; + u32 decap_fib_index = 0; + u8 ver_o_c = 0; + u8 length = 0; + u8 md_type = 0; + u8 next_protocol = 1; /* default: ip4 */ + u32 decap_next_index = NSH_VXLAN_GPE_INPUT_NEXT_IP4_INPUT; + u32 spi; + u8 spi_set = 0; + u32 si; + u32 vni; + u8 vni_set = 0; + u8 si_set = 0; + u32 spi_si; + u32 c1 = 0; + u32 c2 = 0; + u32 c3 = 0; + u32 c4 = 0; + u32 *tlvs = 0; + u32 tmp; + int rv; + vnet_nsh_vxlan_gpe_add_del_tunnel_args_t _a, * a = &_a; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "src %U", + unformat_ip4_address, &src)) + src_set = 1; + else if (unformat (line_input, "dst %U", + unformat_ip4_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "encap-vrf-id %d", &tmp)) + { + encap_fib_index = fib_index_from_fib_id (tmp); + if (encap_fib_index == ~0) + return clib_error_return (0, "nonexistent encap fib id %d", tmp); + } + else if (unformat (line_input, "decap-vrf-id %d", &tmp)) + { + decap_fib_index = fib_index_from_fib_id (tmp); + if (decap_fib_index == ~0) + return clib_error_return (0, "nonexistent decap fib id %d", tmp); + } + else if (unformat (line_input, "decap-next %U", unformat_decap_next, + &decap_next_index)) + ; + else if (unformat (line_input, "vni %d", &vni)) + vni_set = 1; + else if (unformat (line_input, "version %d", &tmp)) + ver_o_c |= (tmp & 3) << 6; + else if (unformat (line_input, "o-bit %d", &tmp)) + ver_o_c |= (tmp & 1) << 5; + else if (unformat (line_input, "c-bit %d", &tmp)) + ver_o_c |= (tmp & 1) << 4; + else if (unformat (line_input, "md-type %d", &tmp)) + md_type = tmp; + else if (unformat(line_input, "next-ip4")) + next_protocol = 1; + else if (unformat(line_input, "next-ip6")) + next_protocol = 2; + else if (unformat(line_input, "next-ethernet")) + next_protocol = 3; + else if (unformat(line_input, "next-nsh")) + next_protocol = 4; + else if (unformat (line_input, "c1 %d", &c1)) + ; + else if (unformat (line_input, "c2 %d", &c2)) + ; + else if (unformat (line_input, "c3 %d", &c3)) + ; + else if (unformat (line_input, "c4 %d", &c4)) + ; + else if (unformat (line_input, "spi %d", &spi)) + spi_set = 1; + else if (unformat (line_input, "si %d", &si)) + si_set = 1; + else if (unformat (line_input, "tlv %x")) + vec_add1 (tlvs, tmp); + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (src_set == 0) + return clib_error_return (0, "tunnel src address not specified"); + + if (dst_set == 0) + return clib_error_return (0, "tunnel dst address not specified"); + + if (vni_set == 0) + return clib_error_return (0, "vni not specified"); + + if (spi_set == 0) + return clib_error_return (0, "spi not specified"); + + if (si_set == 0) + return clib_error_return (0, "si not specified"); + + spi_si = (spi<<8) | si; + + memset (a, 0, sizeof (*a)); + + a->is_add = is_add; + +#define _(x) a->x = x; + foreach_copy_field; +#undef _ + + rv = vnet_nsh_vxlan_gpe_add_del_tunnel (a, 0 /* hw_if_indexp */); + + switch(rv) + { + case 0: + break; + case VNET_API_ERROR_INVALID_DECAP_NEXT: + return clib_error_return (0, "invalid decap-next..."); + + case VNET_API_ERROR_TUNNEL_EXIST: + return clib_error_return (0, "tunnel already exists..."); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel does not exist..."); + + default: + return clib_error_return + (0, "vnet_nsh_vxlan_gpe_add_del_tunnel returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (create_nsh_vxlan_gpe_tunnel_command, static) = { + .path = "nsh vxlan tunnel", + .short_help = + "nsh vxlan tunnel src <ip4-addr> dst <ip4-addr>" + " c1 <nn> c2 <nn> c3 <nn> c4 <nn> spi <nn> si <nn> vni <nn>\n" + " [encap-fib-id <nn>] [decap-fib-id <nn>] [o-bit <1|0>] [c-bit <1|0>]\n" + " [md-type <nn>][next-ip4][next-ip6][next-ethernet][next-nsh]\n" + " [tlv <xx>][decap-next [ip4|ip6|ethernet|nsh-encap]][del]\n", + .function = nsh_vxlan_gpe_add_del_tunnel_command_fn, +}; + +static clib_error_t * +show_nsh_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main; + nsh_vxlan_gpe_tunnel_t * t; + + if (pool_elts (ngm->tunnels) == 0) + vlib_cli_output (vm, "No nsh-vxlan-gpe tunnels configured..."); + + pool_foreach (t, ngm->tunnels, + ({ + vlib_cli_output (vm, "%U", format_nsh_vxlan_gpe_tunnel, t); + })); + + return 0; +} + +VLIB_CLI_COMMAND (show_nsh_vxlan_gpe_tunnel_command, static) = { + .path = "show nsh vxlan tunnel", + .function = show_nsh_vxlan_gpe_tunnel_command_fn, +}; + +clib_error_t *nsh_vxlan_gpe_init (vlib_main_t *vm) +{ + nsh_vxlan_gpe_main_t *ngm = &nsh_vxlan_gpe_main; + + ngm->vnet_main = vnet_get_main(); + ngm->vlib_main = vm; + + ngm->nsh_vxlan_gpe_tunnel_by_key + = hash_create_mem (0, sizeof(nsh_vxlan_gpe_tunnel_key_t), sizeof (uword)); + + udp_register_dst_port (vm, UDP_DST_PORT_vxlan_gpe, + nsh_vxlan_gpe_input_node.index, 1 /* is_ip4 */); + return 0; +} + +VLIB_INIT_FUNCTION(nsh_vxlan_gpe_init); + diff --git a/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h new file mode 100644 index 00000000000..953035a472b --- /dev/null +++ b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_nsh_vxlan_gpe_h +#define included_vnet_nsh_vxlan_gpe_h + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/l2/l2_input.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/gre/gre.h> +#include <vnet/nsh-gre/nsh_gre_packet.h> +#include <vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/udp.h> + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; /* 20 bytes */ + udp_header_t udp; /* 8 bytes */ + vxlan_gpe_header_t vxlan; /* 8 bytes */ + nsh_header_t nsh; /* 28 bytes */ +}) ip4_vxlan_gpe_and_nsh_header_t; + +typedef CLIB_PACKED(struct { + /* + * Key fields: ip src, vxlan vni, nsh spi_si + * all fields in NET byte order + */ + union { + struct { + u32 src; + u32 vni; /* shifted 8 bits */ + u32 spi_si; + u32 pad; + }; + u64 as_u64[2]; + }; +}) nsh_vxlan_gpe_tunnel_key_t; + +typedef struct { + /* Rewrite string. $$$$ embed vnet_rewrite header */ + u8 * rewrite; + + /* decap next index */ + u32 decap_next_index; + + /* tunnel src and dst addresses */ + ip4_address_t src; + ip4_address_t dst; + + /* FIB indices */ + u32 encap_fib_index; /* tunnel partner lookup here */ + u32 decap_fib_index; /* inner IP lookup here */ + + /* vxlan VNI in HOST byte order, shifted left 8 bits */ + u32 vni; + + /* vnet intfc hw/sw_if_index */ + u32 hw_if_index; + u32 sw_if_index; + + /* NSH header fields in HOST byte order */ + u8 ver_o_c; + u8 length; + u8 md_type; + u8 next_protocol; + u32 spi_si; + + /* Context headers, always present, in HOST byte order */ + u32 c1, c2, c3, c4; + u32 * tlvs; +} nsh_vxlan_gpe_tunnel_t; + +#define foreach_nsh_vxlan_gpe_input_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(IP6_INPUT, "ip6-input") \ +_(ETHERNET_INPUT, "ethernet-input") \ +_(NSH_VXLAN_GPE_ENCAP, "nsh-vxlan-gpe-encap") + +typedef enum { +#define _(s,n) NSH_VXLAN_GPE_INPUT_NEXT_##s, + foreach_nsh_vxlan_gpe_input_next +#undef _ + NSH_VXLAN_GPE_INPUT_N_NEXT, +} nsh_vxlan_gpe_input_next_t; + +typedef enum { +#define nsh_vxlan_gpe_error(n,s) NSH_VXLAN_GPE_ERROR_##n, +#include <vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def> +#undef nsh_vxlan_gpe_error + NSH_VXLAN_GPE_N_ERROR, +} nsh_vxlan_gpe_input_error_t; + +typedef struct { + /* vector of encap tunnel instances */ + nsh_vxlan_gpe_tunnel_t *tunnels; + + /* lookup tunnel by key */ + uword * nsh_vxlan_gpe_tunnel_by_key; + + /* Free vlib hw_if_indices */ + u32 * free_nsh_vxlan_gpe_tunnel_hw_if_indices; + + /* show device instance by real device instance */ + u32 * dev_inst_by_real; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} nsh_vxlan_gpe_main_t; + +nsh_vxlan_gpe_main_t nsh_vxlan_gpe_main; + +vlib_node_registration_t nsh_vxlan_gpe_input_node; +vlib_node_registration_t nsh_vxlan_gpe_encap_node; + +u8 * format_nsh_vxlan_gpe_encap_trace (u8 * s, va_list * args); + +typedef struct { + u8 is_add; + ip4_address_t src, dst; + u32 encap_fib_index; + u32 decap_fib_index; + u32 decap_next_index; + u32 vni; + u8 ver_o_c; + u8 length; + u8 md_type; + u8 next_protocol; + u32 spi_si; + u32 c1, c2, c3, c4; + u32 * tlvs; +} vnet_nsh_vxlan_gpe_add_del_tunnel_args_t; + +int vnet_nsh_vxlan_gpe_add_del_tunnel +(vnet_nsh_vxlan_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp); + +#endif /* included_vnet_nsh_vxlan_gpe_h */ diff --git a/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def new file mode 100644 index 00000000000..4ba64fe4dc5 --- /dev/null +++ b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +nsh_vxlan_gpe_error (DECAPSULATED, "good packets decapsulated") +nsh_vxlan_gpe_error (NO_SUCH_TUNNEL, "no such tunnel packets") diff --git a/vnet/vnet/nsh-vxlan-gpe/vxlan-gpe-rfc.txt b/vnet/vnet/nsh-vxlan-gpe/vxlan-gpe-rfc.txt new file mode 100644 index 00000000000..35cee50f573 --- /dev/null +++ b/vnet/vnet/nsh-vxlan-gpe/vxlan-gpe-rfc.txt @@ -0,0 +1,868 @@ +Network Working Group P. Quinn +Internet-Draft Cisco Systems, Inc. +Intended status: Experimental P. Agarwal +Expires: January 4, 2015 Broadcom + R. Fernando + L. Kreeger + D. Lewis + F. Maino + M. Smith + N. Yadav + Cisco Systems, Inc. + L. Yong + Huawei USA + X. Xu + Huawei Technologies + U. Elzur + Intel + P. Garg + Microsoft + July 3, 2014 + + + Generic Protocol Extension for VXLAN + draft-quinn-vxlan-gpe-03.txt + +Abstract + + This draft describes extending Virtual eXtensible Local Area Network + (VXLAN), via changes to the VXLAN header, with three new + capabilities: support for multi-protocol encapsulation, operations, + administration and management (OAM) signaling and explicit + versioning. + +Status of this Memo + + This Internet-Draft is submitted in full conformance with the + provisions of BCP 78 and BCP 79. + + Internet-Drafts are working documents of the Internet Engineering + Task Force (IETF). Note that other groups may also distribute + working documents as Internet-Drafts. The list of current Internet- + Drafts is at http://datatracker.ietf.org/drafts/current/. + + Internet-Drafts are draft documents valid for a maximum of six months + and may be updated, replaced, or obsoleted by other documents at any + time. It is inappropriate to use Internet-Drafts as reference + material or to cite them other than as "work in progress." + + + + +Quinn, et al. Expires January 4, 2015 [Page 1] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + + This Internet-Draft will expire on January 4, 2015. + +Copyright Notice + + Copyright (c) 2014 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + (http://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 2] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 4 + 2. VXLAN Without Protocol Extension . . . . . . . . . . . . . . . 5 + 3. Generic Protocol Extension VXLAN (VXLAN-gpe) . . . . . . . . . 6 + 3.1. Multi Protocol Support . . . . . . . . . . . . . . . . . . 6 + 3.2. OAM Support . . . . . . . . . . . . . . . . . . . . . . . 7 + 3.3. Version Bits . . . . . . . . . . . . . . . . . . . . . . . 7 + 4. Backward Compatibility . . . . . . . . . . . . . . . . . . . . 8 + 4.1. VXLAN VTEP to VXLAN-gpe VTEP . . . . . . . . . . . . . . . 8 + 4.2. VXLAN-gpe VTEP to VXLAN VTEP . . . . . . . . . . . . . . . 8 + 4.3. VXLAN-gpe UDP Ports . . . . . . . . . . . . . . . . . . . 8 + 4.4. VXLAN-gpe and Encapsulated IP Header Fields . . . . . . . 8 + 5. VXLAN-gpe Examples . . . . . . . . . . . . . . . . . . . . . . 9 + 6. Security Considerations . . . . . . . . . . . . . . . . . . . 11 + 7. Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . 12 + 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 13 + 8.1. UDP Port . . . . . . . . . . . . . . . . . . . . . . . . . 13 + 8.2. VXLAN-gpe Next Protocol . . . . . . . . . . . . . . . . . 13 + 8.3. VXLAN-gpe Reserved Bits . . . . . . . . . . . . . . . . . 13 + 9. References . . . . . . . . . . . . . . . . . . . . . . . . . . 14 + 9.1. Normative References . . . . . . . . . . . . . . . . . . . 14 + 9.2. Informative References . . . . . . . . . . . . . . . . . . 14 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 15 + + + + + + + + + + + + + + + + + + + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 3] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +1. Introduction + + Virtual eXtensible Local Area Network [VXLAN] defines an + encapsulation format that encapsulates Ethernet frames in an outer + UDP/IP transport. As data centers evolve, the need to carry other + protocols encapsulated in an IP packet is required, as well as the + need to provide increased visibility and diagnostic capabilities + within the overlay. The VXLAN header does not specify the protocol + being encapsulated and therefore is currently limited to + encapsulating only Ethernet frame payload, nor does it provide the + ability to define OAM protocols. Rather than defining yet another + encapsulation, VXLAN is extended to provide protocol typing and OAM + capabilities. + + This document describes extending VXLAN via the following changes: + + Next Protocol Bit (P bit): A reserved flag bit is allocated, and set + in the VXLAN-gpe header to indicate that a next protocol field is + present. + + OAM Flag Bit (O bit): A reserved flag bit is allocated, and set in + the VXLAN-gpe header, to indicate that the packet is an OAM + packet. + + Version: Two reserved bits are allocated, and set in the VXLAN-gpe + header, to indicate VXLAN-gpe protocol version. + + Next Protocol: A 8 bit next protocol field is present in the VXLAN- + gpe header. + + + + + + + + + + + + + + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 4] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +2. VXLAN Without Protocol Extension + + As described in the introduction, the VXLAN header has no protocol + identifier that indicates the type of payload being carried by VXLAN. + Because of this, VXLAN is limited to an Ethernet payload. + Furthermore, the VXLAN header has no mechanism to signal OAM packets. + + The VXLAN header defines bits 0-7 as flags (some defined, some + reserved), the VXLAN network identifier (VNI) field and several + reserved bits. The flags provide flexibility to define how the + reserved bits can be used to change the definition of the VXLAN + header. + + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |R|R|R|R|I|R|R|R| Reserved | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | VXLAN Network Identifier (VNI) | Reserved | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + Figure 1: VXLAN Header + + + + + + + + + + + + + + + + + + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 5] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +3. Generic Protocol Extension VXLAN (VXLAN-gpe) + +3.1. Multi Protocol Support + + This draft defines the following two changes to the VXLAN header in + order to support multi-protocol encapsulation: + + P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit + MUST be set to 1 to indicate the presence of the 8 bit next + protocol field. + + P = 0 indicates that the payload MUST conform to VXLAN as defined + in [VXLAN]. + + Flag bit 5 was chosen as the P bit because this flag bit is + currently reserved in VXLAN. + + Next Protocol Field: The lower 8 bits of the first word are used to + carry a next protocol. This next protocol field contains the + protocol of the encapsulated payload packet. A new protocol + registry will be requested from IANA. + + This draft defines the following Next Protocol values: + + 0x1 : IPv4 + 0x2 : IPv6 + 0x3 : Ethernet + 0x4 : Network Service Header [NSH] + + + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |R|R|R|R|I|P|R|R| Reserved |Next Protocol | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | VXLAN Network Identifier (VNI) | Reserved | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + Figure 2: VXLAN-gpe Next Protocol + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 6] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +3.2. OAM Support + + Flag bit 7 is defined as the O bit. When the O bit is set to 1, the + packet is an OAM packet and OAM processing MUST occur. The OAM + protocol details are out of scope for this document. As with the + P-bit, bit 7 is currently a reserved flag in VXLAN. + + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |R|R|R|R|I|P|R|O| Reserved |Next Protocol | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | VXLAN Network Identifier (VNI) | Reserved | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + Figure 3: VXLAN-gpe OAM Bit + +3.3. Version Bits + + VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are + reserved in VXLAN. The version field is used to ensure backward + compatibility going forward with future VXLAN-gpe updates. + + The initial version for VXLAN-gpe is 0. + + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | VXLAN Network Identifier (VNI) | Reserved | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + + + Figure 4: VXLAN-gpe Version Bits + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 7] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +4. Backward Compatibility + +4.1. VXLAN VTEP to VXLAN-gpe VTEP + + As per VXLAN, reserved bits 5 and 7, VXLAN-gpe P and O-bits + respectively must be set to zero. The remaining reserved bits must + be zero, including the VXLAN-gpe version field, bits 8 and 9. The + encapsulated payload MUST be Ethernet. + +4.2. VXLAN-gpe VTEP to VXLAN VTEP + + A VXLAN-gpe VTEP MUST NOT encapsulate non-Ethernet frames to a VXLAN + VTEP. When encapsulating Ethernet frames to a VXLAN VTEP, the VXLAN- + gpe VTEP will set the P bit to 0, the Next Protocol to 0 and use UDP + destination port 4789. A VXLAN-gpe VTEP MUST also set O = 0 and Ver + = 0 when encapsulating Ethernet frames to VXLAN VTEP. The receiving + VXLAN VTEP will threat this packet as a VXLAN packet. + + A method for determining the capabilities of a VXLAN VTEP (gpe or + non-gpe) is out of the scope of this draft. + +4.3. VXLAN-gpe UDP Ports + + VXLAN-gpe uses a new UDP destination port (to be assigned by IANA) + when sending traffic to VXLAN-gpe VTEPs. + +4.4. VXLAN-gpe and Encapsulated IP Header Fields + + When encapsulating and decapsulating IPv4 and IPv6 packets, certain + fields, such as IPv4 Time to Live (TTL) from the inner IP header need + to be considered. VXLAN-gpe IP encapsulation and decapsulation + utilizes the techniques described in [RFC6830], section 5.3. + + + + + + + + + + + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 8] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +5. VXLAN-gpe Examples + + This section provides three examples of protocols encapsulated using + the Generic Protocol Extension for VXLAN described in this document. + + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | VXLAN Network Identifier (VNI) | Reserved | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Original IPv4 Packet | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + Figure 5: IPv4 and VXLAN-gpe + + + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv6 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | VXLAN Network Identifier (VNI) | Reserved | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Original IPv6 Packet | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + Figure 6: IPv6 and VXLAN-gpe + + + + + + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 9] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |R|R|R|R|I|1|R|0|0|0| Reserved |NP = Ethernet | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | VXLAN Network Identifier (VNI) | Reserved | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Original Ethernet Frame | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + Figure 7: Ethernet and VXLAN-gpe + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 10] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +6. Security Considerations + + VXLAN's security is focused on issues around L2 encapsulation into + L3. With VXLAN-gpe, issues such as spoofing, flooding, and traffic + redirection are dependent on the particular protocol payload + encapsulated. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 11] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +7. Acknowledgments + + A special thank you goes to Dino Farinacci for his guidance and + detailed review. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 12] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +8. IANA Considerations + +8.1. UDP Port + + A new UDP port will be requested from IANA. + +8.2. VXLAN-gpe Next Protocol + + IANA is requested to set up a registry of "Next Protocol". These are + 8-bit values. Next Protocol values 0, 1, 2, 3 and 4 are defined in + this draft. New values are assigned via Standards Action [RFC5226]. + + +---------------+-------------+---------------+ + | Next Protocol | Description | Reference | + +---------------+-------------+---------------+ + | 0 | Reserved | This document | + | | | | + | 1 | IPv4 | This document | + | | | | + | 2 | IPv6 | This document | + | | | | + | 3 | Ethernet | This document | + | | | | + | 4 | NSH | This document | + | | | | + | 5..253 | Unassigned | | + +---------------+-------------+---------------+ + + Table 1 + +8.3. VXLAN-gpe Reserved Bits + + There are ten bits at the beginning of the VXLAN-gpe header. New + bits are assigned via Standards Action [RFC5226]. + + Bits 0-3 - Reserved + Bit 4 - Instance ID (I bit) + Bit 5 - Next Protocol (P bit) + Bit 6 - Reserved + Bit 7 - OAM (O bit) + Bits 8-9 - Version + + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 13] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +9. References + +9.1. Normative References + + [RFC0768] Postel, J., "User Datagram Protocol", STD 6, RFC 768, + August 1980. + + [RFC0791] Postel, J., "Internet Protocol", STD 5, RFC 791, + September 1981. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC5226] Narten, T. and H. Alvestrand, "Guidelines for Writing an + IANA Considerations Section in RFCs", BCP 26, RFC 5226, + May 2008. + +9.2. Informative References + + [NSH] Quinn, P. and et al. , "Network Service Header", 2014. + + [RFC1700] Reynolds, J. and J. Postel, "Assigned Numbers", RFC 1700, + October 1994. + + [RFC6830] Farinacci, D., Fuller, V., Meyer, D., and D. Lewis, "The + Locator/ID Separation Protocol (LISP)", RFC 6830, + January 2013. + + [VXLAN] Dutt, D., Mahalingam, M., Duda, K., Agarwal, P., Kreeger, + L., Sridhar, T., Bursell, M., and C. Wright, "VXLAN: A + Framework for Overlaying Virtualized Layer 2 Networks over + Layer 3 Networks", 2013. + + + + + + + + + + + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 14] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + +Authors' Addresses + + Paul Quinn + Cisco Systems, Inc. + + Email: paulq@cisco.com + + + Puneet Agarwal + Broadcom + + Email: pagarwal@broadcom.com + + + Rex Fernando + Cisco Systems, Inc. + + Email: rex@cisco.com + + + Larry Kreeger + Cisco Systems, Inc. + + Email: kreeger@cisco.com + + + Darrel Lewis + Cisco Systems, Inc. + + Email: darlewis@cisco.com + + + Fabio Maino + Cisco Systems, Inc. + + Email: kreeger@cisco.com + + + Michael Smith + Cisco Systems, Inc. + + Email: michsmit@cisco.com + + + + + + + + + +Quinn, et al. Expires January 4, 2015 [Page 15] + +Internet-Draft Generic Protocol Extension for VXLAN July 2014 + + + Navindra Yadav + Cisco Systems, Inc. + + Email: nyadav@cisco.com + + + Lucy Yong + Huawei USA + + Email: lucy.yong@huawei.com + + + Xiaohu Xu + Huawei Technologies + + Email: xuxiaohu@huawei.com + + + Uri Elzur + Intel + + Email: uri.elzur@intel.com + + + Pankaj Garg + Microsoft + + Email: Garg.Pankaj@microsoft.com diff --git a/vnet/vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h b/vnet/vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h new file mode 100644 index 00000000000..efc85c4bb54 --- /dev/null +++ b/vnet/vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vxlan_gpe_packet_h +#define included_vxlan_gpe_packet_h + +/* + * From draft-quinn-vxlan-gpe-03.txt + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * I Bit: Flag bit 4 indicates that the VNI is valid. + * + * P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit + * MUST be set to 1 to indicate the presence of the 8 bit next + * protocol field. + * + * O Bit: Flag bit 7 is defined as the O bit. When the O bit is set to 1, + * + * the packet is an OAM packet and OAM processing MUST occur. The OAM + * protocol details are out of scope for this document. As with the + * P-bit, bit 7 is currently a reserved flag in VXLAN. + * + * VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are + * reserved in VXLAN. The version field is used to ensure backward + * compatibility going forward with future VXLAN-gpe updates. + * + * The initial version for VXLAN-gpe is 0. + * + * This draft defines the following Next Protocol values: + * + * 0x1 : IPv4 + * 0x2 : IPv6 + * 0x3 : Ethernet + * 0x4 : Network Service Header [NSH] + */ + +typedef struct { + u8 flags; + u8 ver_res; + u8 res; + u8 next_protocol; + u32 vni_res; +} vxlan_gpe_header_t; + +#define VXLAN_GPE_FLAGS_I 0x08 +#define VXLAN_GPE_FLAGS_P 0x04 +#define VXLAN_GPE_FLAGS_O 0x01 + +#define VXLAN_GPE_VERSION 0x0 + +#define VXLAN_NEXT_PROTOCOL_IP4 0x1 +#define VXLAN_NEXT_PROTOCOL_IP6 0x2 +#define VXLAN_NEXT_PROTOCOL_ETHERNET 0x3 +#define VXLAN_NEXT_PROTOCOL_NSH 0x4 + +#endif /* included_vxlan_gpe_packet_h */ diff --git a/vnet/vnet/osi/node.c b/vnet/vnet/osi/node.c new file mode 100644 index 00000000000..723cf47f25a --- /dev/null +++ b/vnet/vnet/osi/node.c @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * osi_node.c: osi packet processing + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/osi/osi.h> +#include <vnet/ppp/ppp.h> +#include <vnet/hdlc/hdlc.h> +#include <vnet/llc/llc.h> + +#define foreach_osi_input_next \ + _ (PUNT, "error-punt") \ + _ (DROP, "error-drop") + +typedef enum { +#define _(s,n) OSI_INPUT_NEXT_##s, + foreach_osi_input_next +#undef _ + OSI_INPUT_N_NEXT, +} osi_input_next_t; + +typedef struct { + u8 packet_data[32]; +} osi_input_trace_t; + +static u8 * format_osi_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + osi_input_trace_t * t = va_arg (*va, osi_input_trace_t *); + + s = format (s, "%U", format_osi_header, t->packet_data); + + return s; +} + +static uword +osi_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + osi_main_t * lm = &osi_main; + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, + from, + n_left_from, + sizeof (from[0]), + sizeof (osi_input_trace_t)); + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + osi_header_t * h0, * h1; + u8 next0, next1, enqueue_code; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * b2, * b3; + + b2 = vlib_get_buffer (vm, from[2]); + b3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (b2, LOAD); + vlib_prefetch_buffer_header (b3, LOAD); + + CLIB_PREFETCH (b2->data, sizeof (h0[0]), LOAD); + CLIB_PREFETCH (b3->data, sizeof (h1[0]), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = (void *) (b0->data + b0->current_data); + h1 = (void *) (b1->data + b1->current_data); + + next0 = lm->input_next_by_protocol[h0->protocol]; + next1 = lm->input_next_by_protocol[h1->protocol]; + + b0->error = node->errors[next0 == OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL : OSI_ERROR_NONE]; + b1->error = node->errors[next1 == OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL : OSI_ERROR_NONE]; + + enqueue_code = (next0 != next_index) + 2*(next1 != next_index); + + if (PREDICT_FALSE (enqueue_code != 0)) + { + switch (enqueue_code) + { + case 1: + /* A B A */ + to_next[-2] = bi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next0, bi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next1, bi1); + break; + + case 3: + /* A B B or A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, next0, bi0); + vlib_set_next_frame_buffer (vm, node, next1, bi1); + if (next0 == next1) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + next_index = next1; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + } + } + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + osi_header_t * h0; + u8 next0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = (void *) (b0->data + b0->current_data); + + next0 = lm->input_next_by_protocol[h0->protocol]; + + b0->error = node->errors[next0 == OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL : OSI_ERROR_NONE]; + + /* Sent packet to wrong next? */ + if (PREDICT_FALSE (next0 != next_index)) + { + /* Return old frame; remove incorrectly enqueued packet. */ + vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); + + /* Send to correct next. */ + next_index = next0; + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static char * osi_error_strings[] = { +#define _(f,s) s, + foreach_osi_error +#undef _ +}; + +VLIB_REGISTER_NODE (osi_input_node) = { + .function = osi_input, + .name = "osi-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = OSI_N_ERROR, + .error_strings = osi_error_strings, + + .n_next_nodes = OSI_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [OSI_INPUT_NEXT_##s] = n, + foreach_osi_input_next +#undef _ + }, + + .format_buffer = format_osi_header_with_length, + .format_trace = format_osi_input_trace, + .unformat_buffer = unformat_osi_header, +}; + +static clib_error_t * osi_input_init (vlib_main_t * vm) +{ + clib_error_t * error = 0; + osi_main_t * lm = &osi_main; + + if ((error = vlib_call_init_function (vm, osi_init))) + return error; + + osi_setup_node (vm, osi_input_node.index); + + { + int i; + for (i = 0; i < ARRAY_LEN (lm->input_next_by_protocol); i++) + lm->input_next_by_protocol[i] = OSI_INPUT_NEXT_DROP; + } + + ppp_register_input_protocol (vm, PPP_PROTOCOL_osi, osi_input_node.index); + hdlc_register_input_protocol (vm, HDLC_PROTOCOL_osi, osi_input_node.index); + llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer1, osi_input_node.index); + llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer2, osi_input_node.index); + llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer3, osi_input_node.index); + llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer4, osi_input_node.index); + llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer5, osi_input_node.index); + + return 0; +} + +VLIB_INIT_FUNCTION (osi_input_init); + +void +osi_register_input_protocol (osi_protocol_t protocol, + u32 node_index) +{ + osi_main_t * lm = &osi_main; + vlib_main_t * vm = lm->vlib_main; + osi_protocol_info_t * pi; + + { + clib_error_t * error = vlib_call_init_function (vm, osi_input_init); + if (error) + clib_error_report (error); + } + + pi = osi_get_protocol_info (lm, protocol); + pi->node_index = node_index; + pi->next_index = vlib_node_add_next (vm, + osi_input_node.index, + node_index); + + lm->input_next_by_protocol[protocol] = pi->next_index; +} diff --git a/vnet/vnet/osi/osi.c b/vnet/vnet/osi/osi.c new file mode 100644 index 00000000000..7313dab1937 --- /dev/null +++ b/vnet/vnet/osi/osi.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * osi.c: osi support + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/osi/osi.h> + +/* Global main structure. */ +osi_main_t osi_main; + +u8 * format_osi_protocol (u8 * s, va_list * args) +{ + osi_protocol_t p = va_arg (*args, u32); + osi_main_t * pm = &osi_main; + osi_protocol_info_t * pi = osi_get_protocol_info (pm, p); + + if (pi) + s = format (s, "%s", pi->name); + else + s = format (s, "0x%02x", p); + + return s; +} + +u8 * format_osi_header_with_length (u8 * s, va_list * args) +{ + osi_main_t * pm = &osi_main; + osi_header_t * h = va_arg (*args, osi_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + osi_protocol_t p = h->protocol; + uword indent, header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "osi header truncated"); + + indent = format_get_indent (s); + + s = format (s, "OSI %U", format_osi_protocol, p); + + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + { + osi_protocol_info_t * pi = osi_get_protocol_info (pm, p); + vlib_node_t * node = vlib_get_node (pm->vlib_main, pi->node_index); + if (node->format_buffer) + s = format (s, "\n%U%U", + format_white_space, indent, + node->format_buffer, (void *) (h + 1), + max_header_bytes - header_bytes); + } + + return s; +} + +u8 * format_osi_header (u8 * s, va_list * args) +{ + osi_header_t * h = va_arg (*args, osi_header_t *); + return format (s, "%U", format_osi_header_with_length, h, 0); +} + +/* Returns osi protocol as an int in host byte order. */ +uword +unformat_osi_protocol (unformat_input_t * input, va_list * args) +{ + u8 * result = va_arg (*args, u8 *); + osi_main_t * pm = &osi_main; + int p, i; + + /* Numeric type. */ + if (unformat (input, "0x%x", &p) + || unformat (input, "%d", &p)) + { + if (p >= (1 << 8)) + return 0; + *result = p; + return 1; + } + + /* Named type. */ + if (unformat_user (input, unformat_vlib_number_by_name, + pm->protocol_info_by_name, &i)) + { + osi_protocol_info_t * pi = vec_elt_at_index (pm->protocol_infos, i); + *result = pi->protocol; + return 1; + } + + return 0; +} + +uword +unformat_osi_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + osi_header_t _h, * h = &_h; + u8 p; + + if (! unformat (input, "%U", unformat_osi_protocol, &p)) + return 0; + + h->protocol = p; + + /* Add header to result. */ + { + void * p; + u32 n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, n_bytes); + memcpy (p, h, n_bytes); + } + + return 1; +} + +static void add_protocol (osi_main_t * pm, + osi_protocol_t protocol, + char * protocol_name) +{ + osi_protocol_info_t * pi; + u32 i; + + vec_add2 (pm->protocol_infos, pi, 1); + i = pi - pm->protocol_infos; + + pi->name = protocol_name; + pi->protocol = protocol; + pi->next_index = pi->node_index = ~0; + + hash_set (pm->protocol_info_by_protocol, protocol, i); + hash_set_mem (pm->protocol_info_by_name, pi->name, i); +} + +static clib_error_t * osi_init (vlib_main_t * vm) +{ + osi_main_t * pm = &osi_main; + + memset (pm, 0, sizeof (pm[0])); + pm->vlib_main = vm; + + pm->protocol_info_by_name = hash_create_string (0, sizeof (uword)); + pm->protocol_info_by_protocol = hash_create (0, sizeof (uword)); + +#define _(f,n) add_protocol (pm, OSI_PROTOCOL_##f, #f); + foreach_osi_protocol; +#undef _ + + return vlib_call_init_function (vm, osi_input_init); +} + +VLIB_INIT_FUNCTION (osi_init); + diff --git a/vnet/vnet/osi/osi.h b/vnet/vnet/osi/osi.h new file mode 100644 index 00000000000..e213b1ba327 --- /dev/null +++ b/vnet/vnet/osi/osi.h @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * osi.h: OSI definitions + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_osi_h +#define included_osi_h + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> + +#define foreach_osi_protocol \ + _ (null, 0x0) \ + _ (x_29, 0x01) \ + _ (x_633, 0x03) \ + _ (q_931, 0x08) \ + _ (q_933, 0x08) \ + _ (q_2931, 0x09) \ + _ (q_2119, 0x0c) \ + _ (snap, 0x80) \ + _ (clnp, 0x81) \ + _ (esis, 0x82) \ + _ (isis, 0x83) \ + _ (idrp, 0x85) \ + _ (x25_esis, 0x8a) \ + _ (iso10030, 0x8c) \ + _ (iso11577, 0x8d) \ + _ (ip6, 0x8e) \ + _ (compressed, 0xb0) \ + _ (sndcf, 0xc1) \ + _ (ip4, 0xcc) \ + _ (ppp, 0xcf) + +typedef enum { +#define _(f,n) OSI_PROTOCOL_##f = n, + foreach_osi_protocol +#undef _ +} osi_protocol_t; + +typedef struct { + u8 protocol; + + u8 payload[0]; +} osi_header_t; + +typedef struct { + /* Name (a c string). */ + char * name; + + /* OSI protocol (SAP type). */ + osi_protocol_t protocol; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} osi_protocol_info_t; + +#define foreach_osi_error \ + _ (NONE, "no error") \ + _ (UNKNOWN_PROTOCOL, "unknown osi protocol") + +typedef enum { +#define _(f,s) OSI_ERROR_##f, + foreach_osi_error +#undef _ + OSI_N_ERROR, +} osi_error_t; + +typedef struct { + vlib_main_t * vlib_main; + + osi_protocol_info_t * protocol_infos; + + /* Hash tables mapping name/protocol to protocol info index. */ + uword * protocol_info_by_name, * protocol_info_by_protocol; + + /* osi-input next index indexed by protocol. */ + u8 input_next_by_protocol[256]; +} osi_main_t; + +always_inline osi_protocol_info_t * +osi_get_protocol_info (osi_main_t * m, osi_protocol_t protocol) +{ + uword * p = hash_get (m->protocol_info_by_protocol, protocol); + return p ? vec_elt_at_index (m->protocol_infos, p[0]) : 0; +} + +extern osi_main_t osi_main; + +/* Register given node index to take input for given osi type. */ +void +osi_register_input_protocol (osi_protocol_t protocol, + u32 node_index); + +void osi_set_adjacency (vnet_rewrite_header_t * rw, + uword max_data_bytes, + osi_protocol_t protocol); + +format_function_t format_osi_protocol; +format_function_t format_osi_header; +format_function_t format_osi_header_with_length; + +/* Parse osi protocol as 0xXXXX or protocol name. */ +unformat_function_t unformat_osi_protocol; + +/* Parse osi header. */ +unformat_function_t unformat_osi_header; +unformat_function_t unformat_pg_osi_header; + +always_inline void +osi_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + pg_node_t * pn = pg_get_node (node_index); + + n->format_buffer = format_osi_header_with_length; + n->unformat_buffer = unformat_osi_header; + pn->unformat_edit = unformat_pg_osi_header; +} + +void +osi_register_input_protocol (osi_protocol_t protocol, + u32 node_index); + +format_function_t format_osi_header; + +#endif /* included_osi_h */ diff --git a/vnet/vnet/osi/pg.c b/vnet/vnet/osi/pg.c new file mode 100644 index 00000000000..c3eb1a6ed66 --- /dev/null +++ b/vnet/vnet/osi/pg.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * osi_pg.c: packet generator osi interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/osi/osi.h> + +typedef struct { + pg_edit_t protocol; +} pg_osi_header_t; + +static inline void +pg_osi_header_init (pg_osi_header_t * e) +{ + pg_edit_init (&e->protocol, osi_header_t, protocol); +} + +uword +unformat_pg_osi_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_osi_header_t * h; + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (osi_header_t), + &group_index); + pg_osi_header_init (h); + + error = 1; + if (! unformat (input, "%U", + unformat_pg_edit, + unformat_osi_protocol, &h->protocol)) + goto done; + + { + osi_main_t * pm = &osi_main; + osi_protocol_info_t * pi = 0; + pg_node_t * pg_node = 0; + + if (h->protocol.type == PG_EDIT_FIXED) + { + u8 t = *h->protocol.values[PG_EDIT_LO]; + pi = osi_get_protocol_info (pm, t); + if (pi && pi->node_index != ~0) + pg_node = pg_get_node (pi->node_index); + } + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto done; + } + + error = 0; + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/pg/cli.c b/vnet/vnet/pg/cli.c new file mode 100644 index 00000000000..495eac2d0b1 --- /dev/null +++ b/vnet/vnet/pg/cli.c @@ -0,0 +1,438 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pg_cli.c: packet generator cli + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> + +#ifdef CLIB_UNIX +#include <vnet/unix/pcap.h> +#endif + +/* Root of all packet generator cli commands. */ +VLIB_CLI_COMMAND (vlib_cli_pg_command, static) = { + .path = "packet-generator", + .short_help = "Packet generator commands", +}; + +static clib_error_t * +enable_disable_stream (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + pg_main_t * pg = &pg_main; + pg_stream_t * s; + int is_enable = cmd->function_arg != 0; + u32 stream_index = ~0; + + if (unformat (input, "%U", unformat_eof)) + ; + else if (unformat (input, "%U", unformat_hash_vec_string, + pg->stream_index_by_name, &stream_index)) + ; + else + return clib_error_create ("unknown input `%U'", + format_unformat_error, input); + + /* No stream specified: enable/disable all streams. */ + if (stream_index == ~0) + pool_foreach (s, pg->streams, ({ + pg_stream_enable_disable (pg, s, is_enable); + })); + else + { + /* enable/disable specified stream. */ + s = pool_elt_at_index (pg->streams, stream_index); + pg_stream_enable_disable (pg, s, is_enable); + } + + return 0; +} + +VLIB_CLI_COMMAND (enable_streams_cli, static) = { + .path = "packet-generator enable-stream", + .short_help = "Enable packet generator streams", + .function = enable_disable_stream, + .function_arg = 1, /* is_enable */ +}; + +VLIB_CLI_COMMAND (disable_streams_cli, static) = { + .path = "packet-generator disable-stream", + .short_help = "Disable packet generator streams", + .function = enable_disable_stream, + .function_arg = 0, /* is_enable */ +}; + +static u8 * format_pg_stream (u8 * s, va_list * va) +{ + pg_stream_t * t = va_arg (*va, pg_stream_t *); + u8 * v; + + if (! t) + return format (s, "%=16s%=12s%=16s%s", + "Name", "Enabled", "Count", "Parameters"); + + s = format (s, "%-16v%=12s%16Ld", + t->name, + pg_stream_is_enabled (t) ? "Yes" : "No", + t->n_packets_generated); + + v = 0; + + v = format (v, "limit %Ld, ", t->n_packets_limit); + + v = format (v, "rate %.2e pps, ", t->rate_packets_per_second); + + v = format (v, "size %d%c%d, ", + t->min_packet_bytes, + t->packet_size_edit_type == PG_EDIT_RANDOM ? '+' : '-', + t->max_packet_bytes); + + v = format (v, "buffer-size %d, ", t->buffer_bytes); + + if (v) + { + s = format (s, " %v", v); + vec_free (v); + } + + return s; +} + +static clib_error_t * +show_streams (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + pg_main_t * pg = &pg_main; + pg_stream_t * s; + + if (pool_elts (pg->streams) == 0) + { + vlib_cli_output (vm, "no streams currently defined"); + goto done; + } + + vlib_cli_output (vm, "%U", format_pg_stream, 0); + pool_foreach (s, pg->streams, ({ + vlib_cli_output (vm, "%U", format_pg_stream, s); + })); + + done: + return 0; +} + +VLIB_CLI_COMMAND (show_streams_cli, static) = { + .path = "show packet-generator", + .short_help = "Show packet generator streams", + .function = show_streams, +}; + +static clib_error_t * +pg_pcap_read (pg_stream_t * s, char * file_name) +{ +#ifndef CLIB_UNIX + return clib_error_return (0, "no pcap support"); +#else + pcap_main_t pm; + clib_error_t * error; + memset (&pm, 0, sizeof (pm)); + pm.file_name = file_name; + error = pcap_read (&pm); + s->replay_packet_templates = pm.packets_read; + s->min_packet_bytes = pm.min_packet_bytes; + s->max_packet_bytes = pm.max_packet_bytes; + s->buffer_bytes = pm.max_packet_bytes; + /* For PCAP buffers we never re-use buffers. */ + s->flags |= PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE; + return error; +#endif /* CLIB_UNIX */ +} + +static uword +unformat_pg_stream_parameter (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + f64 x; + + if (unformat (input, "limit %f", &x)) + s->n_packets_limit = x; + + else if (unformat (input, "rate %f", &x)) + s->rate_packets_per_second = x; + + else if (unformat (input, "size %d-%d", &s->min_packet_bytes, + &s->max_packet_bytes)) + s->packet_size_edit_type = PG_EDIT_INCREMENT; + + else if (unformat (input, "size %d+%d", &s->min_packet_bytes, + &s->max_packet_bytes)) + s->packet_size_edit_type = PG_EDIT_RANDOM; + + else if (unformat (input, "buffer-size %d", &s->buffer_bytes)) + ; + + else + return 0; + + return 1; +} + +static clib_error_t * +validate_stream (pg_stream_t * s) +{ + if (s->max_packet_bytes < s->min_packet_bytes) + return clib_error_create ("max-size < min-size"); + + if (s->buffer_bytes >= 4096 || s->buffer_bytes == 0) + return clib_error_create ("buffer-size must be positive and < 4096, given %d", + s->buffer_bytes); + + if (s->rate_packets_per_second < 0) + return clib_error_create ("negative rate"); + + return 0; +} + +static clib_error_t * +new_stream (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t * error = 0; + u8 * tmp = 0; + u32 hw_if_index; + unformat_input_t sub_input = {0}; + int sub_input_given = 0; + vnet_main_t * vnm = vnet_get_main(); + pg_main_t * pg = &pg_main; + pg_stream_t s = {0}; + char * pcap_file_name; + + s.sw_if_index[VLIB_RX] = s.sw_if_index[VLIB_TX] = ~0; + s.node_index = ~0; + s.max_packet_bytes = s.min_packet_bytes = 64; + s.buffer_bytes = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + pcap_file_name = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "name %v", &tmp)) + { + if (s.name) + vec_free (s.name); + s.name = tmp; + } + + else if (unformat (input, "node %U", + unformat_vnet_hw_interface, vnm, &hw_if_index)) + { + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); + + s.node_index = hi->output_node_index; + s.sw_if_index[VLIB_TX] = hi->sw_if_index; + } + + else if (unformat (input, "node %U", + unformat_vlib_node, vm, &s.node_index)) + ; + + else if (unformat (input, "interface %U", + unformat_vnet_sw_interface, vnm, &s.sw_if_index[VLIB_RX])) + ; + + else if (unformat (input, "pcap %s", &pcap_file_name)) + ; + + else if (! sub_input_given + && unformat (input, "data %U", unformat_input, &sub_input)) + sub_input_given++; + + else if (unformat_user (input, unformat_pg_stream_parameter, &s)) + ; + + else if (unformat (input, "no-recycle")) + s.flags |= PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE; + + else + { + error = clib_error_create ("unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + error = validate_stream (&s); + if (error) + return error; + + if (! sub_input_given && ! pcap_file_name) + { + error = clib_error_create ("no packet data given"); + goto done; + } + + if (s.node_index == ~0) + { + error = clib_error_create ("output interface or node not given"); + goto done; + } + + { + pg_node_t * n; + + if (s.node_index < vec_len (pg->nodes)) + n = pg->nodes + s.node_index; + else + n = 0; + + if (pcap_file_name != 0) + { + error = pg_pcap_read (&s, pcap_file_name); + if (error) + goto done; + vec_free (pcap_file_name); + } + + else if (n && n->unformat_edit + && unformat_user (&sub_input, n->unformat_edit, &s)) + ; + + else if (! unformat_user (&sub_input, unformat_pg_payload, &s)) + { + error = clib_error_create + ("failed to parse packet data from `%U'", + format_unformat_error, &sub_input); + goto done; + } + } + + pg_stream_add (pg, &s); + return 0; + + done: + pg_stream_free (&s); + unformat_free (&sub_input); + return error; +} + +VLIB_CLI_COMMAND (new_stream_cli, static) = { + .path = "packet-generator new", + .function = new_stream, + .short_help = "Create packet generator stream", + .long_help = + "Create packet generator stream\n" + "\n" + "Arguments:\n" + "\n" + "name STRING sets stream name\n" + "interface STRING interface for stream output \n" + "node NODE-NAME node for stream output\n" + "data STRING specifies packet data\n", +}; + +static clib_error_t * +del_stream (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + pg_main_t * pg = &pg_main; + u32 i; + + if (! unformat (input, "%U", + &unformat_hash_vec_string, pg->stream_index_by_name, &i)) + return clib_error_create ("expected stream name `%U'", + format_unformat_error, input); + + pg_stream_del (pg, i); + return 0; +} + +VLIB_CLI_COMMAND (del_stream_cli, static) = { + .path = "packet-generator delete", + .function = del_stream, + .short_help = "Delete stream with given name", +}; + +static clib_error_t * +change_stream_parameters (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + pg_main_t * pg = &pg_main; + pg_stream_t * s, s_new; + u32 stream_index = ~0; + clib_error_t * error; + + if (unformat (input, "%U", unformat_hash_vec_string, + pg->stream_index_by_name, &stream_index)) + ; + else + return clib_error_create ("expecting stream name; got `%U'", + format_unformat_error, input); + + s = pool_elt_at_index (pg->streams, stream_index); + s_new = s[0]; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat_user (input, unformat_pg_stream_parameter, &s_new)) + ; + + else + return clib_error_create ("unknown input `%U'", + format_unformat_error, input); + } + + error = validate_stream (&s_new); + if (! error) + s[0] = s_new; + + return error; +} + +VLIB_CLI_COMMAND (change_stream_parameters_cli, static) = { + .path = "packet-generator configure", + .short_help = "Change packet generator stream parameters", + .function = change_stream_parameters, +}; + +/* Dummy init function so that we can be linked in. */ +static clib_error_t * pg_cli_init (vlib_main_t * vm) +{ return 0; } + +VLIB_INIT_FUNCTION (pg_cli_init); diff --git a/vnet/vnet/pg/edit.c b/vnet/vnet/pg/edit.c new file mode 100644 index 00000000000..9c49cef8903 --- /dev/null +++ b/vnet/vnet/pg/edit.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pg_edit.c: packet generator edits + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> + +static void +pg_edit_set_value_helper (pg_edit_t * e, u64 value, u8 * result) +{ + int i, j, n_bits_left; + u8 * v, tmp[8]; + + v = tmp; + + n_bits_left = e->n_bits; + i = 0; + j = e->lsb_bit_offset % BITS (v[0]); + + if (n_bits_left > 0 && j != 0) + { + v[i] = (value & 0xff) << j; + value >>= BITS (v[0]) - j; + n_bits_left -= BITS (v[0]) - j; + i += 1; + } + + while (n_bits_left > 0) + { + v[i] = value & 0xff; + value >>= 8; + n_bits_left -= 8; + i += 1; + } + + /* Convert to network byte order. */ + for (j = 0; j < i; j++) + result[j] = v[i - 1 - j]; +} + +void +pg_edit_set_value (pg_edit_t * e, int hi_or_lo, u64 value) +{ + pg_edit_alloc_value (e, hi_or_lo); + pg_edit_set_value_helper (e, value, e->values[hi_or_lo]); +} + +/* Parse an int either %d or 0x%x into network byte order. */ +uword unformat_pg_number (unformat_input_t * input, va_list * args) +{ + u8 * result = va_arg (*args, u8 *); + pg_edit_t * e = va_arg (*args, pg_edit_t *); + u64 value; + + ASSERT (BITS (value) >= e->n_bits); + + if (! unformat (input, "0x%X", sizeof (value), &value) + && ! unformat (input, "%D", sizeof (value), &value)) + return 0; + + /* Number given does not fit into bit field. */ + if (e->n_bits < 64 + && value >= (u64) 1 << (u64) e->n_bits) + return 0; + + pg_edit_set_value_helper (e, value, result); + return 1; +} + +uword +unformat_pg_edit (unformat_input_t * input, va_list * args) +{ + unformat_function_t * f = va_arg (*args, unformat_function_t *); + pg_edit_t * e = va_arg (*args, pg_edit_t *); + + pg_edit_alloc_value (e, PG_EDIT_LO); + if (! unformat_user (input, f, e->values[PG_EDIT_LO], e)) + return 0; + + pg_edit_alloc_value (e, PG_EDIT_HI); + if (unformat (input, "-%U", f, e->values[PG_EDIT_HI], e)) + e->type = PG_EDIT_INCREMENT; + else if (unformat (input, "+%U", f, e->values[PG_EDIT_HI], e)) + e->type = PG_EDIT_RANDOM; + else + e->type = PG_EDIT_FIXED; + + return 1; +} + +uword +unformat_pg_payload (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_main_t * pg = &pg_main; + vlib_main_t * vm = pg->vlib_main; + pg_edit_t * e; + u32 i, node_index, len, max_len; + u8 * v; + + v = 0; + + if (unformat (input, "incrementing %d", &len)) + { + vec_resize (v, len); + for (i = 0; i < len; i++) + v[i] = i; + } + else if (unformat (input, "hex 0x%U", unformat_hex_string, &v)) + ; + + else if (unformat (input, "%U", unformat_vlib_node, vm, &node_index)) + { + pg_node_t * pn = pg_get_node (node_index); + if (! pn->unformat_edit) + return 0; + return unformat (input, "%U", pn->unformat_edit, s); + } + + else + return 0; + + /* Length not including this payload. */ + max_len = pg_edit_group_n_bytes (s, 0); + if (max_len + vec_len (v) >= s->max_packet_bytes) + { + if (s->max_packet_bytes >= max_len) + _vec_len (v) = s->max_packet_bytes - max_len; + else + _vec_len (v) = 0; + } + + e = pg_create_edit_group (s, sizeof (e[0]), vec_len (v), 0); + + e->type = PG_EDIT_FIXED; + e->n_bits = vec_len (v) * BITS (v[0]); + + /* Least significant bit is at end of bitstream, since everything is always bigendian. */ + e->lsb_bit_offset = e->n_bits - BITS (v[0]); + + e->values[PG_EDIT_LO] = v; + + return 1; +} diff --git a/vnet/vnet/pg/edit.h b/vnet/vnet/pg/edit.h new file mode 100644 index 00000000000..07c14a2a203 --- /dev/null +++ b/vnet/vnet/pg/edit.h @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pg_edit.h: packet generator edits + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_packet_generator_pg_edit_h +#define included_packet_generator_pg_edit_h + +#include <vppinfra/format.h> +#include <vppinfra/vec.h> + +typedef enum { + /* Invalid type used to poison edits. */ + PG_EDIT_INVALID_TYPE, + + /* Value is fixed: does not change for all packets in sequence. */ + PG_EDIT_FIXED, + + /* Value v increments between low and high values v_low <= v <= v_high. */ + PG_EDIT_INCREMENT, + + /* Random value between low and high values v_low <= v <= v_high. */ + PG_EDIT_RANDOM, + + /* Unspecified value; will be specified by some edit function. */ + PG_EDIT_UNSPECIFIED, +} pg_edit_type_t; + +typedef struct { + pg_edit_type_t type; + + /* Bit offset within packet where value is to be written. + Bits are written in network byte order: high bits first. + This is the bit offset of the least significant bit: i.e. the + highest numbered byte * 8 plus bit offset within that byte. + Negative offsets encode special edits. */ + i32 lsb_bit_offset; + + /* Special offset indicating this edit is for packet length. */ +#define PG_EDIT_PACKET_LENGTH (-1) + + /* Number of bits in edit. */ + u32 n_bits; + + /* Low and high values for this edit. Network byte order. */ + u8 * values[2]; +#define PG_EDIT_LO 0 +#define PG_EDIT_HI 1 + + /* Last value used for increment edit type. */ + u64 last_increment_value; +} pg_edit_t; + +always_inline void +pg_edit_free (pg_edit_t * e) +{ + int i; + for (i = 0; i < ARRAY_LEN (e->values); i++) + vec_free (e->values[i]); +} + +#define pg_edit_init_bitfield(e,type,field,field_offset,field_n_bits) \ +do { \ + u32 _bo; \ + \ + ASSERT ((field_offset) < STRUCT_BITS_OF (type, field)); \ + \ + /* Start byte offset. */ \ + _bo = STRUCT_OFFSET_OF (type, field); \ + \ + /* Adjust for big endian byte order. */ \ + _bo += ((STRUCT_BITS_OF (type, field) \ + - (field_offset) - 1) / BITS (u8)); \ + \ + (e)->lsb_bit_offset = _bo * BITS (u8) + ((field_offset) % BITS (u8)); \ + (e)->n_bits = (field_n_bits); \ +} while (0) + +/* Initialize edit for byte aligned fields. */ +#define pg_edit_init(e,type,field) \ + pg_edit_init_bitfield(e,type,field,0,STRUCT_BITS_OF(type,field)) + +static inline uword +pg_edit_n_alloc_bytes (pg_edit_t * e) +{ + int i0, i1, n_bytes, n_bits_left; + + i0 = e->lsb_bit_offset; + i1 = i0 % BITS (u8); + + n_bytes = 0; + n_bits_left = e->n_bits; + + if (n_bits_left > 0 && i1 != 0) + { + n_bytes++; + n_bits_left -= i1; + if (n_bits_left < 0) + n_bits_left = 0; + } + + n_bytes += (n_bits_left / BITS (u8)); + n_bytes += (n_bits_left % BITS (u8)) != 0; + + return n_bytes; +} + +static inline void +pg_edit_alloc_value (pg_edit_t * e, int i) +{ vec_validate (e->values[i], e->lsb_bit_offset / BITS (u8)); } + +extern void pg_edit_set_value (pg_edit_t * e, int hi_or_lo, u64 value); + +static inline void +pg_edit_set_fixed (pg_edit_t * e, u64 value) +{ + e->type = PG_EDIT_FIXED; + pg_edit_set_value (e, PG_EDIT_LO, value); +} + +static inline void +pg_edit_copy_type_and_values (pg_edit_t * dst, pg_edit_t * src) +{ + int i; + dst->type = src->type; + src->type = PG_EDIT_INVALID_TYPE; + for (i = 0; i < ARRAY_LEN (dst->values); i++) + { + dst->values[i] = src->values[i]; + src->values[i] = 0; + } +} + +static inline u64 +pg_edit_get_value (pg_edit_t * e, int hi_or_lo) +{ + u64 r = 0; + int i, n; + u8 * v = e->values[hi_or_lo]; + + n = round_pow2 (e->n_bits, BITS (u8)) / BITS (u8); + + ASSERT (n <= vec_len (v)); + ASSERT (n <= sizeof (r)); + + for (i = 0; i < n; i++) + r = (r << BITS (v[i])) + v[i]; + + return r; +} + +static inline uword +pg_edit_is_fixed_with_value (pg_edit_t * e, u64 value) +{ + return (e->type == PG_EDIT_FIXED + && value == pg_edit_get_value (e, PG_EDIT_LO)); +} + +uword unformat_pg_edit (unformat_input_t * input, va_list * args); +uword unformat_pg_payload (unformat_input_t * input, va_list * args); +uword unformat_pg_number (unformat_input_t * input, va_list * args); +uword unformat_pg_interface (unformat_input_t * input, va_list * args); + +#endif /* included_packet_generator_pg_edit_h */ diff --git a/vnet/vnet/pg/example.script b/vnet/vnet/pg/example.script new file mode 100644 index 00000000000..0e29b9ecae6 --- /dev/null +++ b/vnet/vnet/pg/example.script @@ -0,0 +1,6 @@ +packet-generator new { + name x + limit 1 + node ethernet-input + data { IP: 1.2.3 -> 4.5.6 incrementing 100 } +} diff --git a/vnet/vnet/pg/init.c b/vnet/vnet/pg/init.c new file mode 100644 index 00000000000..f598050c28d --- /dev/null +++ b/vnet/vnet/pg/init.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pg_init.c: VLIB packet generator + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> + +/* Global main structure. */ +pg_main_t pg_main; + +static clib_error_t * pg_init (vlib_main_t * vm) +{ + clib_error_t * error; + pg_main_t * pg = &pg_main; + int i, j; + + pg->vlib_main = vm; + + if ((error = vlib_call_init_function (vm, vnet_main_init))) + goto done; + + if ((error = vlib_call_init_function (vm, pg_cli_init))) + goto done; + + /* Create/free interfaces so that they exist and can be + used as a destination interface for streams. Also, create + a fixed number of pg interfaces so that interface numbering can + be made to be deterministic (at least if <= 4 streams are ever used). */ + for (i = 0; i < 4; i++) + { + j = pg_interface_find_free (pg, i); + ASSERT (j == i); + } + + /* Free interfaces. */ + for (i = j; i >= 0; i--) + vec_add1 (pg->free_interfaces, i); + + done: + return error; +} + +VLIB_INIT_FUNCTION (pg_init); diff --git a/vnet/vnet/pg/input.c b/vnet/vnet/pg/input.c new file mode 100644 index 00000000000..4ec61ca7b96 --- /dev/null +++ b/vnet/vnet/pg/input.c @@ -0,0 +1,1745 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pg_input.c: buffer generator input + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/vnet.h> + +#if DPDK==1 +#include <vnet/devices/dpdk/dpdk.h> +#endif + +static inline void +pg_set_mbuf_metadata (pg_main_t * pg, u32 * buffers, u32 n_alloc) +{ +#if DPDK == 1 + vlib_main_t * vm = vlib_get_main(); + vlib_buffer_t * b; + struct rte_mbuf * mb; + i16 delta; + u16 new_data_len; + u16 new_pkt_len; + + int i; + + for (i = 0; i < n_alloc; i++) + { + b = vlib_get_buffer (vm, buffers[i]); + mb = ((struct rte_mbuf *)b) - 1; + + delta = vlib_buffer_length_in_chain (vm, b) - (i16) mb->pkt_len; + new_data_len = (u16)((i16) mb->data_len + delta); + new_pkt_len = (u16)((i16) mb->pkt_len + delta); + + mb->data_len = new_data_len; + mb->pkt_len = new_pkt_len; + mb->data_off = (u16)((RTE_PKTMBUF_HEADROOM) + b->current_data); + } +#endif +} + +static int +validate_buffer_data2 (vlib_buffer_t * b, pg_stream_t * s, + u32 data_offset, u32 n_bytes) +{ + u8 * bd, * pd, * pm; + u32 i; + + bd = b->data; + pd = s->fixed_packet_data + data_offset; + pm = s->fixed_packet_data_mask + data_offset; + + if (pd + n_bytes >= vec_end (s->fixed_packet_data)) + n_bytes = (pd < vec_end (s->fixed_packet_data) + ? vec_end (s->fixed_packet_data) - pd + : 0); + + for (i = 0; i < n_bytes; i++) + if ((bd[i] & pm[i]) != pd[i]) + break; + + if (i >= n_bytes) + return 1; + + clib_warning ("buffer %U", format_vlib_buffer, b); + clib_warning ("differ at index %d", i); + clib_warning ("is %U", format_hex_bytes, bd, n_bytes); + clib_warning ("mask %U", format_hex_bytes, pm, n_bytes); + clib_warning ("expect %U", format_hex_bytes, pd, n_bytes); + return 0; +} + +static int +validate_buffer_data (vlib_buffer_t * b, pg_stream_t * s) +{ return validate_buffer_data2 (b, s, 0, s->buffer_bytes); } + +always_inline void +set_1 (void * a0, + u64 v0, + u64 v_min, u64 v_max, + u32 n_bits, + u32 is_net_byte_order) +{ + ASSERT (v0 >= v_min && v0 <= v_max); + if (n_bits == BITS (u8)) + { + ((u8 *) a0)[0] = v0; + } + else if (n_bits == BITS (u16)) + { + if (is_net_byte_order) + v0 = clib_host_to_net_u16 (v0); + clib_mem_unaligned (a0, u16) = v0; + } + else if (n_bits == BITS (u32)) + { + if (is_net_byte_order) + v0 = clib_host_to_net_u32 (v0); + clib_mem_unaligned (a0, u32) = v0; + } + else if (n_bits == BITS (u64)) + { + if (is_net_byte_order) + v0 = clib_host_to_net_u64 (v0); + clib_mem_unaligned (a0, u64) = v0; + } +} + +always_inline void +set_2 (void * a0, void * a1, + u64 v0, u64 v1, + u64 v_min, u64 v_max, + u32 n_bits, + u32 is_net_byte_order, + u32 is_increment) +{ + ASSERT (v0 >= v_min && v0 <= v_max); + ASSERT (v1 >= v_min && v1 <= (v_max + is_increment)); + if (n_bits == BITS (u8)) + { + ((u8 *) a0)[0] = v0; + ((u8 *) a1)[0] = v1; + } + else if (n_bits == BITS (u16)) + { + if (is_net_byte_order) + { + v0 = clib_host_to_net_u16 (v0); + v1 = clib_host_to_net_u16 (v1); + } + clib_mem_unaligned (a0, u16) = v0; + clib_mem_unaligned (a1, u16) = v1; + } + else if (n_bits == BITS (u32)) + { + if (is_net_byte_order) + { + v0 = clib_host_to_net_u32 (v0); + v1 = clib_host_to_net_u32 (v1); + } + clib_mem_unaligned (a0, u32) = v0; + clib_mem_unaligned (a1, u32) = v1; + } + else if (n_bits == BITS (u64)) + { + if (is_net_byte_order) + { + v0 = clib_host_to_net_u64 (v0); + v1 = clib_host_to_net_u64 (v1); + } + clib_mem_unaligned (a0, u64) = v0; + clib_mem_unaligned (a1, u64) = v1; + } +} + +static_always_inline void +do_set_fixed (pg_main_t * pg, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers, + u32 n_bits, + u32 byte_offset, + u32 is_net_byte_order, + u64 v_min, u64 v_max) + +{ + vlib_main_t * vm = pg->vlib_main; + + while (n_buffers >= 4) + { + vlib_buffer_t * b0, * b1, * b2, * b3; + void * a0, * a1; + + b0 = vlib_get_buffer (vm, buffers[0]); + b1 = vlib_get_buffer (vm, buffers[1]); + b2 = vlib_get_buffer (vm, buffers[2]); + b3 = vlib_get_buffer (vm, buffers[3]); + buffers += 2; + n_buffers -= 2; + + a0 = (void *) b0 + byte_offset; + a1 = (void *) b1 + byte_offset; + CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE); + CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE); + + set_2 (a0, a1, v_min, v_min, + v_min, v_max, + n_bits, is_net_byte_order, + /* is_increment */ 0); + + ASSERT (validate_buffer_data (b0, s)); + ASSERT (validate_buffer_data (b1, s)); + } + + while (n_buffers > 0) + { + vlib_buffer_t * b0; + void * a0; + + b0 = vlib_get_buffer (vm, buffers[0]); + buffers += 1; + n_buffers -= 1; + + a0 = (void *) b0 + byte_offset; + + set_1 (a0, v_min, + v_min, v_max, + n_bits, is_net_byte_order); + + ASSERT (validate_buffer_data (b0, s)); + } +} + +static_always_inline u64 +do_set_increment (pg_main_t * pg, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers, + u32 n_bits, + u32 byte_offset, + u32 is_net_byte_order, + u32 want_sum, + u64 * sum_result, + u64 v_min, u64 v_max, + u64 v) +{ + vlib_main_t * vm = pg->vlib_main; + u64 sum = 0; + + ASSERT (v >= v_min && v <= v_max); + + while (n_buffers >= 4) + { + vlib_buffer_t * b0, * b1, * b2, * b3; + void * a0, * a1; + u64 v_old; + + b0 = vlib_get_buffer (vm, buffers[0]); + b1 = vlib_get_buffer (vm, buffers[1]); + b2 = vlib_get_buffer (vm, buffers[2]); + b3 = vlib_get_buffer (vm, buffers[3]); + buffers += 2; + n_buffers -= 2; + + a0 = (void *) b0 + byte_offset; + a1 = (void *) b1 + byte_offset; + CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE); + CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE); + + v_old = v; + v = v_old + 2; + v = v > v_max ? v_min : v; + set_2 (a0, a1, + v_old + 0, v_old + 1, + v_min, v_max, + n_bits, is_net_byte_order, + /* is_increment */ 1); + + if (want_sum) + sum += 2*v_old + 1; + + if (PREDICT_FALSE (v_old + 1 > v_max)) + { + if (want_sum) + sum -= 2*v_old + 1; + + v = v_old; + set_1 (a0, v + 0, v_min, v_max, n_bits, is_net_byte_order); + if (want_sum) + sum += v; + v += 1; + + v = v > v_max ? v_min : v; + set_1 (a1, v + 0, v_min, v_max, n_bits, is_net_byte_order); + if (want_sum) + sum += v; + v += 1; + } + + ASSERT (validate_buffer_data (b0, s)); + ASSERT (validate_buffer_data (b1, s)); + } + + while (n_buffers > 0) + { + vlib_buffer_t * b0; + void * a0; + u64 v_old; + + b0 = vlib_get_buffer (vm, buffers[0]); + buffers += 1; + n_buffers -= 1; + + a0 = (void *) b0 + byte_offset; + + v_old = v; + if (want_sum) + sum += v_old; + v += 1; + v = v > v_max ? v_min : v; + + ASSERT (v_old >= v_min && v_old <= v_max); + set_1 (a0, v_old, v_min, v_max, n_bits, is_net_byte_order); + + ASSERT (validate_buffer_data (b0, s)); + } + + if (want_sum) + *sum_result = sum; + + return v; +} + +static_always_inline void +do_set_random (pg_main_t * pg, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers, + u32 n_bits, + u32 byte_offset, + u32 is_net_byte_order, + u32 want_sum, + u64 * sum_result, + u64 v_min, u64 v_max) + +{ + vlib_main_t * vm = pg->vlib_main; + u64 v_diff = v_max - v_min + 1; + u64 r_mask = max_pow2 (v_diff) - 1; + u64 v0, v1; + u64 sum = 0; + void * random_data; + + random_data = clib_random_buffer_get_data + (&vm->random_buffer, n_buffers * n_bits / BITS (u8)); + + v0 = v1 = v_min; + + while (n_buffers >= 4) + { + vlib_buffer_t * b0, * b1, * b2, * b3; + void * a0, * a1; + u64 r0=0, r1=0; /* warnings be gone */ + + b0 = vlib_get_buffer (vm, buffers[0]); + b1 = vlib_get_buffer (vm, buffers[1]); + b2 = vlib_get_buffer (vm, buffers[2]); + b3 = vlib_get_buffer (vm, buffers[3]); + buffers += 2; + n_buffers -= 2; + + a0 = (void *) b0 + byte_offset; + a1 = (void *) b1 + byte_offset; + CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE); + CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE); + + switch (n_bits) + { +#define _(n) \ + case BITS (u##n): \ + { \ + u##n * r = random_data; \ + r0 = r[0]; \ + r1 = r[1]; \ + random_data = r + 2; \ + } \ + break; + + _ (8); + _ (16); + _ (32); + _ (64); + +#undef _ + } + + /* Add power of 2 sized random number which may be out of range. */ + v0 += r0 & r_mask; + v1 += r1 & r_mask; + + /* Twice should be enough to reduce to v_min .. v_max range. */ + v0 = v0 > v_max ? v0 - v_diff : v0; + v1 = v1 > v_max ? v1 - v_diff : v1; + v0 = v0 > v_max ? v0 - v_diff : v0; + v1 = v1 > v_max ? v1 - v_diff : v1; + + if (want_sum) + sum += v0 + v1; + + set_2 (a0, a1, + v0, v1, + v_min, v_max, + n_bits, is_net_byte_order, + /* is_increment */ 0); + + ASSERT (validate_buffer_data (b0, s)); + ASSERT (validate_buffer_data (b1, s)); + } + + while (n_buffers > 0) + { + vlib_buffer_t * b0; + void * a0; + u64 r0 = 0; /* warnings be gone */ + + b0 = vlib_get_buffer (vm, buffers[0]); + buffers += 1; + n_buffers -= 1; + + a0 = (void *) b0 + byte_offset; + + switch (n_bits) + { +#define _(n) \ + case BITS (u##n): \ + { \ + u##n * r = random_data; \ + r0 = r[0]; \ + random_data = r + 1; \ + } \ + break; + + _ (8); + _ (16); + _ (32); + _ (64); + +#undef _ + } + + /* Add power of 2 sized random number which may be out of range. */ + v0 += r0 & r_mask; + + /* Twice should be enough to reduce to v_min .. v_max range. */ + v0 = v0 > v_max ? v0 - v_diff : v0; + v0 = v0 > v_max ? v0 - v_diff : v0; + + if (want_sum) + sum += v0; + + set_1 (a0, v0, v_min, v_max, n_bits, is_net_byte_order); + + ASSERT (validate_buffer_data (b0, s)); + } + + if (want_sum) + *sum_result = sum; +} + +#define _(i,t) \ + clib_mem_unaligned (a##i, t) = \ + clib_host_to_net_##t ((clib_net_to_host_mem_##t (a##i) &~ mask) \ + | (v##i << shift)) + +always_inline void +setbits_1 (void * a0, + u64 v0, + u64 v_min, u64 v_max, + u32 max_bits, + u32 n_bits, + u64 mask, + u32 shift) +{ + ASSERT (v0 >= v_min && v0 <= v_max); + if (max_bits == BITS (u8)) + ((u8 *) a0)[0] = (((u8 *) a0)[0] &~ mask) | (v0 << shift); + + else if (max_bits == BITS (u16)) + { + _ (0, u16); + } + else if (max_bits == BITS (u32)) + { + _ (0, u32); + } + else if (max_bits == BITS (u64)) + { + _ (0, u64); + } +} + +always_inline void +setbits_2 (void * a0, void * a1, + u64 v0, u64 v1, + u64 v_min, u64 v_max, + u32 max_bits, + u32 n_bits, + u64 mask, + u32 shift, + u32 is_increment) +{ + ASSERT (v0 >= v_min && v0 <= v_max); + ASSERT (v1 >= v_min && v1 <= v_max + is_increment); + if (max_bits == BITS (u8)) + { + ((u8 *) a0)[0] = (((u8 *) a0)[0] &~ mask) | (v0 << shift); + ((u8 *) a1)[0] = (((u8 *) a1)[0] &~ mask) | (v1 << shift); + } + + else if (max_bits == BITS (u16)) + { + _ (0, u16); + _ (1, u16); + } + else if (max_bits == BITS (u32)) + { + _ (0, u32); + _ (1, u32); + } + else if (max_bits == BITS (u64)) + { + _ (0, u64); + _ (1, u64); + } +} + +#undef _ + +static_always_inline void +do_setbits_fixed (pg_main_t * pg, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers, + u32 max_bits, + u32 n_bits, + u32 byte_offset, + u64 v_min, u64 v_max, + u64 mask, + u32 shift) + +{ + vlib_main_t * vm = pg->vlib_main; + + while (n_buffers >= 4) + { + vlib_buffer_t * b0, * b1, * b2, * b3; + void * a0, * a1; + + b0 = vlib_get_buffer (vm, buffers[0]); + b1 = vlib_get_buffer (vm, buffers[1]); + b2 = vlib_get_buffer (vm, buffers[2]); + b3 = vlib_get_buffer (vm, buffers[3]); + buffers += 2; + n_buffers -= 2; + + a0 = (void *) b0 + byte_offset; + a1 = (void *) b1 + byte_offset; + CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE); + CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE); + + setbits_2 (a0, a1, + v_min, v_min, + v_min, v_max, + max_bits, n_bits, mask, shift, + /* is_increment */ 0); + + ASSERT (validate_buffer_data (b0, s)); + ASSERT (validate_buffer_data (b1, s)); + } + + while (n_buffers > 0) + { + vlib_buffer_t * b0; + void * a0; + + b0 = vlib_get_buffer (vm, buffers[0]); + buffers += 1; + n_buffers -= 1; + + a0 = (void *) b0 + byte_offset; + + setbits_1 (a0, v_min, v_min, v_max, max_bits, n_bits, mask, shift); + ASSERT (validate_buffer_data (b0, s)); + } +} + +static_always_inline u64 +do_setbits_increment (pg_main_t * pg, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers, + u32 max_bits, + u32 n_bits, + u32 byte_offset, + u64 v_min, u64 v_max, + u64 v, + u64 mask, + u32 shift) +{ + vlib_main_t * vm = pg->vlib_main; + + ASSERT (v >= v_min && v <= v_max); + + while (n_buffers >= 4) + { + vlib_buffer_t * b0, * b1, * b2, * b3; + void * a0, * a1; + u64 v_old; + + b0 = vlib_get_buffer (vm, buffers[0]); + b1 = vlib_get_buffer (vm, buffers[1]); + b2 = vlib_get_buffer (vm, buffers[2]); + b3 = vlib_get_buffer (vm, buffers[3]); + buffers += 2; + n_buffers -= 2; + + a0 = (void *) b0 + byte_offset; + a1 = (void *) b1 + byte_offset; + CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE); + CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE); + + v_old = v; + v = v_old + 2; + v = v > v_max ? v_min : v; + setbits_2 (a0, a1, + v_old + 0, v_old + 1, + v_min, v_max, + max_bits, n_bits, mask, shift, + /* is_increment */ 1); + + if (PREDICT_FALSE (v_old + 1 > v_max)) + { + v = v_old; + setbits_1 (a0, v + 0, v_min, v_max, max_bits, n_bits, mask, shift); + v += 1; + + v = v > v_max ? v_min : v; + setbits_1 (a1, v + 0, v_min, v_max, max_bits, n_bits, mask, shift); + v += 1; + } + ASSERT (validate_buffer_data (b0, s)); + ASSERT (validate_buffer_data (b1, s)); + } + + while (n_buffers > 0) + { + vlib_buffer_t * b0; + void * a0; + u64 v_old; + + b0 = vlib_get_buffer (vm, buffers[0]); + buffers += 1; + n_buffers -= 1; + + a0 = (void *) b0 + byte_offset; + + v_old = v; + v = v_old + 1; + v = v > v_max ? v_min : v; + + ASSERT (v_old >= v_min && v_old <= v_max); + setbits_1 (a0, v_old, v_min, v_max, max_bits, n_bits, mask, shift); + + ASSERT (validate_buffer_data (b0, s)); + } + + return v; +} + +static_always_inline void +do_setbits_random (pg_main_t * pg, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers, + u32 max_bits, + u32 n_bits, + u32 byte_offset, + u64 v_min, u64 v_max, + u64 mask, + u32 shift) +{ + vlib_main_t * vm = pg->vlib_main; + u64 v_diff = v_max - v_min + 1; + u64 r_mask = max_pow2 (v_diff) - 1; + u64 v0, v1; + void * random_data; + + random_data = clib_random_buffer_get_data + (&vm->random_buffer, n_buffers * max_bits / BITS (u8)); + v0 = v1 = v_min; + + while (n_buffers >= 4) + { + vlib_buffer_t * b0, * b1, * b2, * b3; + void * a0, * a1; + u64 r0=0, r1=0; /* warnings be gone */ + + b0 = vlib_get_buffer (vm, buffers[0]); + b1 = vlib_get_buffer (vm, buffers[1]); + b2 = vlib_get_buffer (vm, buffers[2]); + b3 = vlib_get_buffer (vm, buffers[3]); + buffers += 2; + n_buffers -= 2; + + a0 = (void *) b0 + byte_offset; + a1 = (void *) b1 + byte_offset; + CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE); + CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE); + + switch (max_bits) + { +#define _(n) \ + case BITS (u##n): \ + { \ + u##n * r = random_data; \ + r0 = r[0]; \ + r1 = r[1]; \ + random_data = r + 2; \ + } \ + break; + + _ (8); + _ (16); + _ (32); + _ (64); + +#undef _ + } + + /* Add power of 2 sized random number which may be out of range. */ + v0 += r0 & r_mask; + v1 += r1 & r_mask; + + /* Twice should be enough to reduce to v_min .. v_max range. */ + v0 = v0 > v_max ? v0 - v_diff : v0; + v1 = v1 > v_max ? v1 - v_diff : v1; + v0 = v0 > v_max ? v0 - v_diff : v0; + v1 = v1 > v_max ? v1 - v_diff : v1; + + setbits_2 (a0, a1, + v0, v1, + v_min, v_max, + max_bits, n_bits, mask, shift, + /* is_increment */ 0); + + ASSERT (validate_buffer_data (b0, s)); + ASSERT (validate_buffer_data (b1, s)); + } + + while (n_buffers > 0) + { + vlib_buffer_t * b0; + void * a0; + u64 r0 = 0; /* warnings be gone */ + + b0 = vlib_get_buffer (vm, buffers[0]); + buffers += 1; + n_buffers -= 1; + + a0 = (void *) b0 + byte_offset; + + switch (max_bits) + { +#define _(n) \ + case BITS (u##n): \ + { \ + u##n * r = random_data; \ + r0 = r[0]; \ + random_data = r + 1; \ + } \ + break; + + _ (8); + _ (16); + _ (32); + _ (64); + +#undef _ + } + + /* Add power of 2 sized random number which may be out of range. */ + v0 += r0 & r_mask; + + /* Twice should be enough to reduce to v_min .. v_max range. */ + v0 = v0 > v_max ? v0 - v_diff : v0; + v0 = v0 > v_max ? v0 - v_diff : v0; + + setbits_1 (a0, v0, v_min, v_max, max_bits, n_bits, mask, shift); + + ASSERT (validate_buffer_data (b0, s)); + } +} + +static u64 do_it (pg_main_t * pg, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers, + u32 lo_bit, u32 hi_bit, + u64 v_min, u64 v_max, + u64 v, + pg_edit_type_t edit_type) +{ + u32 max_bits, l0, l1, h1, start_bit; + + if (v_min == v_max) + edit_type = PG_EDIT_FIXED; + + l0 = lo_bit / BITS (u8); + l1 = lo_bit % BITS (u8); + h1 = hi_bit % BITS (u8); + + start_bit = l0 * BITS (u8); + + max_bits = hi_bit - start_bit; + ASSERT (max_bits <= 64); + +#define _(n) \ + case (n): \ + if (edit_type == PG_EDIT_INCREMENT) \ + v = do_set_increment (pg, s, buffers, n_buffers, \ + BITS (u##n), \ + l0, \ + /* is_net_byte_order */ 1, \ + /* want sum */ 0, 0, \ + v_min, v_max, \ + v); \ + else if (edit_type == PG_EDIT_RANDOM) \ + do_set_random (pg, s, buffers, n_buffers, \ + BITS (u##n), \ + l0, \ + /* is_net_byte_order */ 1, \ + /* want sum */ 0, 0, \ + v_min, v_max); \ + else /* edit_type == PG_EDIT_FIXED */ \ + do_set_fixed (pg, s, buffers, n_buffers, \ + BITS (u##n), \ + l0, \ + /* is_net_byte_order */ 1, \ + v_min, v_max); \ + goto done; + + if (l1 == 0 && h1 == 0) + { + switch (max_bits) + { + _ (8); + _ (16); + _ (32); + _ (64); + } + } + +#undef _ + + { + u64 mask; + u32 shift = l1; + u32 n_bits = max_bits; + + max_bits = clib_max (max_pow2 (n_bits), 8); + + mask = ((u64) 1 << (u64) n_bits) - 1; + mask &= ~(((u64) 1 << (u64) shift) - 1); + + mask <<= max_bits - n_bits; + shift += max_bits - n_bits; + + switch (max_bits) + { +#define _(n) \ + case (n): \ + if (edit_type == PG_EDIT_INCREMENT) \ + v = do_setbits_increment (pg, s, buffers, n_buffers, \ + BITS (u##n), n_bits, \ + l0, v_min, v_max, v, \ + mask, shift); \ + else if (edit_type == PG_EDIT_RANDOM) \ + do_setbits_random (pg, s, buffers, n_buffers, \ + BITS (u##n), n_bits, \ + l0, v_min, v_max, \ + mask, shift); \ + else /* edit_type == PG_EDIT_FIXED */ \ + do_setbits_fixed (pg, s, buffers, n_buffers, \ + BITS (u##n), n_bits, \ + l0, v_min, v_max, \ + mask, shift); \ + goto done; + + _ (8); + _ (16); + _ (32); + _ (64); + +#undef _ + } + } + + done: + return v; +} + +static void +pg_generate_set_lengths (pg_main_t * pg, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers) +{ + u64 v_min, v_max, length_sum; + pg_edit_type_t edit_type; + + v_min = s->min_packet_bytes; + v_max = s->max_packet_bytes; + edit_type = s->packet_size_edit_type; + + if (edit_type == PG_EDIT_INCREMENT) + s->last_increment_packet_size + = do_set_increment (pg, s, buffers, n_buffers, + 8 * STRUCT_SIZE_OF (vlib_buffer_t, current_length), + STRUCT_OFFSET_OF (vlib_buffer_t, current_length), + /* is_net_byte_order */ 0, + /* want sum */ 1, &length_sum, + v_min, v_max, + s->last_increment_packet_size); + + else if (edit_type == PG_EDIT_RANDOM) + do_set_random (pg, s, buffers, n_buffers, + 8 * STRUCT_SIZE_OF (vlib_buffer_t, current_length), + STRUCT_OFFSET_OF (vlib_buffer_t, current_length), + /* is_net_byte_order */ 0, + /* want sum */ 1, &length_sum, + v_min, v_max); + + else /* edit_type == PG_EDIT_FIXED */ + { + do_set_fixed (pg, s, buffers, n_buffers, + 8 * STRUCT_SIZE_OF (vlib_buffer_t, current_length), + STRUCT_OFFSET_OF (vlib_buffer_t, current_length), + /* is_net_byte_order */ 0, + v_min, v_max); + length_sum = v_min * n_buffers; + } + + { + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, s->sw_if_index[VLIB_RX]); + + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + os_get_cpu_number(), + si->sw_if_index, + n_buffers, + length_sum); + } + + pg_set_mbuf_metadata (pg, buffers, n_buffers); +} + +static void +pg_generate_fix_multi_buffer_lengths (pg_main_t * pg, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers) +{ + vlib_main_t * vm = pg->vlib_main; + pg_buffer_index_t * pbi; + uword n_bytes_left; + static u32 * unused_buffers = 0; + + while (n_buffers > 0) + { + vlib_buffer_t * b; + u32 bi; + + bi = buffers[0]; + b = vlib_get_buffer (vm, bi); + + /* Current length here is length of whole packet. */ + n_bytes_left = b->current_length; + + pbi = s->buffer_indices; + while (1) + { + uword n = clib_min (n_bytes_left, s->buffer_bytes); + + b->current_length = n; + n_bytes_left -= n; + if (n_bytes_left > 0) + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + else + b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; + + /* Return unused buffers to fifos. */ + if (n == 0) + vec_add1 (unused_buffers, bi); + + pbi++; + if (pbi >= vec_end (s->buffer_indices)) + break; + + bi = b->next_buffer; + b = vlib_get_buffer (vm, bi); + } + ASSERT (n_bytes_left == 0); + + buffers += 1; + n_buffers -= 1; + } + + if (vec_len (unused_buffers) > 0) + { + vlib_buffer_free_no_next (vm, unused_buffers, + vec_len (unused_buffers)); + _vec_len (unused_buffers) = 0; + } +} + +static void +pg_generate_edit (pg_main_t * pg, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers) +{ + pg_edit_t * e; + + vec_foreach (e, s->non_fixed_edits) + { + switch (e->type) + { + case PG_EDIT_RANDOM: + case PG_EDIT_INCREMENT: + { + u32 lo_bit, hi_bit; + u64 v_min, v_max; + + v_min = pg_edit_get_value (e, PG_EDIT_LO); + v_max = pg_edit_get_value (e, PG_EDIT_HI); + + hi_bit = (BITS (u8) * STRUCT_OFFSET_OF (vlib_buffer_t, data) + + BITS (u8) + + e->lsb_bit_offset); + lo_bit = hi_bit - e->n_bits; + + e->last_increment_value + = do_it (pg, s, buffers, n_buffers, lo_bit, hi_bit, v_min, v_max, + e->last_increment_value, + e->type); + } + break; + + case PG_EDIT_UNSPECIFIED: + break; + + default: + /* Should not be any fixed edits left. */ + ASSERT (0); + break; + } + } + + /* Call any edit functions to e.g. completely IP lengths, checksums, ... */ + { + int i; + for (i = vec_len (s->edit_groups) - 1; i >= 0; i--) + { + pg_edit_group_t * g = s->edit_groups + i; + if (g->edit_function) + g->edit_function (pg, s, g, buffers, n_buffers); + } + } +} + +static void +pg_set_next_buffer_pointers (pg_main_t * pg, + pg_stream_t * s, + u32 * buffers, + u32 * next_buffers, + u32 n_buffers) +{ + vlib_main_t * vm = pg->vlib_main; + + while (n_buffers >= 4) + { + u32 ni0, ni1; + vlib_buffer_t * b0, * b1; + + b0 = vlib_get_buffer (vm, buffers[0]); + b1 = vlib_get_buffer (vm, buffers[1]); + ni0 = next_buffers[0]; + ni1 = next_buffers[1]; + + vlib_prefetch_buffer_with_index (vm, buffers[2], WRITE); + vlib_prefetch_buffer_with_index (vm, buffers[3], WRITE); + + b0->flags |= VLIB_BUFFER_NEXT_PRESENT; + b1->flags |= VLIB_BUFFER_NEXT_PRESENT; + b0->next_buffer = ni0; + b1->next_buffer = ni1; + + buffers += 2; + next_buffers += 2; + n_buffers -= 2; + } + + while (n_buffers > 0) + { + u32 ni0; + vlib_buffer_t * b0; + + b0 = vlib_get_buffer (vm, buffers[0]); + ni0 = next_buffers[0]; + buffers += 1; + next_buffers += 1; + n_buffers -= 1; + + b0->flags |= VLIB_BUFFER_NEXT_PRESENT; + b0->next_buffer = ni0; + } +} + +static_always_inline void +init_replay_buffers_inline (vlib_main_t * vm, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers, + u32 data_offset, + u32 n_data) +{ + u32 n_left, * b, i, l; + + n_left = n_buffers; + b = buffers; + i = s->current_replay_packet_index; + l = vec_len (s->replay_packet_templates); + + while (n_left >= 1) + { + u32 bi0, n0; + vlib_buffer_t * b0; + u8 * d0; + + bi0 = b[0]; + b += 1; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = s->sw_if_index[VLIB_RX]; + /* was s->sw_if_index[VLIB_TX]; */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0; + + d0 = vec_elt (s->replay_packet_templates, i); + + n0 = n_data; + if (data_offset + n_data >= vec_len (d0)) + n0 = vec_len (d0) > data_offset ? vec_len (d0) - data_offset : 0; + + b0->current_length = n0; + + memcpy (b0->data, d0 + data_offset, n0); + i = i + 1 == l ? 0 : i + 1; + } +} + +static_always_inline void +init_buffers_inline (vlib_main_t * vm, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers, + u32 data_offset, + u32 n_data, + u32 set_data) +{ + u32 n_left, * b; + u8 * data, * mask; + + if (vec_len (s->replay_packet_templates) > 0) + return init_replay_buffers_inline (vm, s, buffers, n_buffers, data_offset, n_data); + + data = s->fixed_packet_data + data_offset; + mask = s->fixed_packet_data_mask + data_offset; + if (data + n_data >= vec_end (s->fixed_packet_data)) + n_data = (data < vec_end (s->fixed_packet_data) + ? vec_end (s->fixed_packet_data) - data + : 0); + if (n_data > 0) + { + ASSERT (data + n_data <= vec_end (s->fixed_packet_data)); + ASSERT (mask + n_data <= vec_end (s->fixed_packet_data_mask)); + } + + n_left = n_buffers; + b = buffers; + + while (n_left >= 4) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, b[2], STORE); + vlib_prefetch_buffer_with_index (vm, b[3], STORE); + + bi0 = b[0]; + bi1 = b[1]; + b += 2; + n_left -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + vnet_buffer (b1)->sw_if_index[VLIB_RX] = s->sw_if_index[VLIB_RX]; + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32)~0; + + if (set_data) + { + memcpy (b0->data, data, n_data); + memcpy (b1->data, data, n_data); + } + else + { + ASSERT (validate_buffer_data2 (b0, s, data_offset, n_data)); + ASSERT (validate_buffer_data2 (b1, s, data_offset, n_data)); + } + } + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + + bi0 = b[0]; + b += 1; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + vnet_buffer (b0)->sw_if_index[VLIB_RX] = s->sw_if_index[VLIB_RX]; + /* s->sw_if_index[VLIB_TX]; */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0; + + if (set_data) + memcpy (b0->data, data, n_data); + else + ASSERT (validate_buffer_data2 (b0, s, data_offset, n_data)); + } +} + +static void pg_buffer_init (vlib_main_t * vm, + vlib_buffer_free_list_t * fl, + u32 * buffers, + u32 n_buffers) +{ + pg_main_t * pg = &pg_main; + pg_stream_t * s; + uword bi, si; + + si = fl->buffer_init_function_opaque & pow2_mask (24); + bi = fl->buffer_init_function_opaque >> 24; + + s = pool_elt_at_index (pg->streams, si); + + init_buffers_inline (vm, s, buffers, n_buffers, + /* data_offset */ bi * s->buffer_bytes, + /* n_data */ s->buffer_bytes, + /* set_data */ 1); +} + +static u32 +pg_stream_fill_helper (pg_main_t * pg, + pg_stream_t * s, + pg_buffer_index_t * bi, + u32 * buffers, + u32 * next_buffers, + u32 n_alloc) +{ + vlib_main_t * vm = pg->vlib_main; + vlib_buffer_free_list_t * f; + uword is_start_of_packet = bi == s->buffer_indices; + u32 n_allocated; + + f = vlib_buffer_get_free_list (vm, bi->free_list_index); + + /* + * Historically, the pg maintained its own free lists and + * device drivers tx paths would return pkts. With the DPDK, + * that doesn't happen. + */ + if (DPDK == 0 && ! (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE)) + f->buffer_init_function = pg_buffer_init; + f->buffer_init_function_opaque = + (s - pg->streams) | ((bi - s->buffer_indices) << 24); + + if (is_start_of_packet) + vnet_buffer (&f->buffer_init_template)->sw_if_index[VLIB_RX] + = vnet_main.local_interface_sw_if_index; + + n_allocated = vlib_buffer_alloc_from_free_list (vm, + buffers, + n_alloc, + bi->free_list_index); + if (n_allocated == 0) + return 0; + + /* + * We can't assume we got all the buffers we asked for... + * This never worked until recently. + */ + n_alloc = n_allocated; + + /* Reinitialize buffers */ + if (DPDK == 0 || CLIB_DEBUG > 0 + || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE)) + init_buffers_inline + (vm, s, + buffers, + n_alloc, + (bi - s->buffer_indices) * s->buffer_bytes /* data offset */, + s->buffer_bytes, + /* set_data */ + DPDK == 1 || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE) != 0); + + /* $$$ this doesn't work at the moment */ + ASSERT(next_buffers == 0); + if (next_buffers) + pg_set_next_buffer_pointers (pg, s, buffers, next_buffers, n_alloc); + + if (is_start_of_packet) + { + if (vec_len (s->replay_packet_templates) > 0) + { + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + vnet_sw_interface_t * si = + vnet_get_sw_interface (vnm, s->sw_if_index[VLIB_RX]); + u32 l = 0; + u32 i; + for (i = 0; i < n_alloc; i++) + l += vlib_buffer_index_length_in_chain (vm, buffers[i]); + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + os_get_cpu_number(), + si->sw_if_index, + n_alloc, + l); + s->current_replay_packet_index += n_alloc; + s->current_replay_packet_index %= + vec_len (s->replay_packet_templates); + } + else + { + pg_generate_set_lengths (pg, s, buffers, n_alloc); + if (vec_len (s->buffer_indices) > 1) + pg_generate_fix_multi_buffer_lengths (pg, s, buffers, n_alloc); + + pg_generate_edit (pg, s, buffers, n_alloc); + } + } + + return n_alloc; +} + +static u32 +pg_stream_fill (pg_main_t * pg, pg_stream_t * s, u32 n_buffers) +{ + pg_buffer_index_t * bi; + word i, n_in_fifo, n_alloc, n_free, n_added; + u32 * tail, * start, * end, * last_tail, * last_start; + + bi = s->buffer_indices; + + n_in_fifo = clib_fifo_elts (bi->buffer_fifo); + if (n_in_fifo >= n_buffers) + return n_in_fifo; + + n_alloc = n_buffers - n_in_fifo; + + /* Round up, but never generate more than limit. */ + n_alloc = clib_max (VLIB_FRAME_SIZE, n_alloc); + + if (s->n_packets_limit > 0 + && s->n_packets_generated + n_in_fifo + n_alloc >= s->n_packets_limit) + { + n_alloc = s->n_packets_limit - s->n_packets_generated - n_in_fifo; + if (n_alloc < 0) + n_alloc = 0; + } + + /* All buffer fifos should have the same size. */ + if (CLIB_DEBUG > 0) + { + uword l = ~0, e; + vec_foreach (bi, s->buffer_indices) + { + e = clib_fifo_elts (bi->buffer_fifo); + if (bi == s->buffer_indices) + l = e; + ASSERT (l == e); + } + } + + last_tail = last_start = 0; + n_added = n_alloc; + + for (i = vec_len (s->buffer_indices) - 1; i >= 0; i--) + { + bi = vec_elt_at_index (s->buffer_indices, i); + + n_free = clib_fifo_free_elts (bi->buffer_fifo); + if (n_free < n_alloc) + clib_fifo_resize (bi->buffer_fifo, n_alloc - n_free); + + tail = clib_fifo_advance_tail (bi->buffer_fifo, n_alloc); + start = bi->buffer_fifo; + end = clib_fifo_end (bi->buffer_fifo); + + if (tail + n_alloc <= end) + { + n_added = pg_stream_fill_helper (pg, s, bi, tail, last_tail, n_alloc); + } + else + { + u32 n = clib_min (end - tail, n_alloc); + n_added = pg_stream_fill_helper (pg, s, bi, tail, last_tail, n); + + if (n_added == n && n_alloc > n_added) + { + n_added += pg_stream_fill_helper + (pg, s, bi, start, last_start, n_alloc - n_added); + } + } + + if (PREDICT_FALSE (n_added < n_alloc)) + tail = clib_fifo_advance_tail (bi->buffer_fifo, n_added - n_alloc); + + last_tail = tail; + last_start = start; + + /* Verify that pkts in the fifo are properly allocated */ +#if DPDK == 1 + if (CLIB_DEBUG > 0) + { + u32 *bi0; + vlib_main_t * vm = vlib_get_main(); + clib_fifo_foreach (bi0, bi->buffer_fifo, + ({ + vlib_buffer_t * b; + struct rte_mbuf *mb; + + b = vlib_get_buffer(vm, bi0[0]); + mb = (struct rte_mbuf *)b - 1; + ASSERT(rte_mbuf_refcnt_read(mb) == 1); + })); + } +#endif + } + + return n_in_fifo + n_added; +} + +typedef struct { + u32 stream_index; + + u32 packet_length; + + /* Use pre data for packet data. */ + vlib_buffer_t buffer; +} pg_input_trace_t; + +static u8 * format_pg_input_trace (u8 * s, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + pg_input_trace_t * t = va_arg (*va, pg_input_trace_t *); + pg_main_t * pg = &pg_main; + pg_stream_t * stream; + vlib_node_t * n; + uword indent = format_get_indent (s); + + stream = 0; + if (! pool_is_free_index (pg->streams, t->stream_index)) + stream = pool_elt_at_index (pg->streams, t->stream_index); + + if (stream) + s = format (s, "stream %v", pg->streams[t->stream_index].name); + else + s = format (s, "stream %d", t->stream_index); + + s = format (s, ", %d bytes", t->packet_length); + + s = format (s, "\n%U%U", + format_white_space, indent, + format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U", + format_white_space, indent); + + n = 0; + if (stream) + n = vlib_get_node (vm, stream->node_index); + + if (n && n->format_buffer) + s = format (s, "%U", n->format_buffer, + t->buffer.pre_data, + sizeof (t->buffer.pre_data)); + else + s = format (s, "%U", + format_hex_bytes, t->buffer.pre_data, + ARRAY_LEN (t->buffer.pre_data)); + return s; +} + +static void +pg_input_trace (pg_main_t * pg, + vlib_node_runtime_t * node, + pg_stream_t * s, + u32 * buffers, + u32 n_buffers) +{ + vlib_main_t * vm = pg->vlib_main; + u32 * b, n_left, stream_index, next_index; + + n_left = n_buffers; + b = buffers; + stream_index = s - pg->streams; + next_index = s->next_index; + + while (n_left >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + pg_input_trace_t * t0, * t1; + + bi0 = b[0]; + bi1 = b[1]; + b += 2; + n_left -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + vlib_trace_buffer (vm, node, next_index, b0, /* follow_chain */ 1); + vlib_trace_buffer (vm, node, next_index, b1, /* follow_chain */ 1); + + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + + t0->stream_index = stream_index; + t1->stream_index = stream_index; + + t0->packet_length = vlib_buffer_length_in_chain (vm, b0); + t1->packet_length = vlib_buffer_length_in_chain (vm, b1); + + memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b1->pre_data)); + + memcpy (t0->buffer.pre_data, b0->data, sizeof (t0->buffer.pre_data)); + memcpy (t1->buffer.pre_data, b1->data, sizeof (t1->buffer.pre_data)); + } + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + pg_input_trace_t * t0; + + bi0 = b[0]; + b += 1; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + vlib_trace_buffer (vm, node, next_index, b0, /* follow_chain */ 1); + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + + t0->stream_index = stream_index; + t0->packet_length = vlib_buffer_length_in_chain (vm, b0); + memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + memcpy (t0->buffer.pre_data, b0->data, sizeof (t0->buffer.pre_data)); + } +} + +static uword +pg_generate_packets (vlib_node_runtime_t * node, + pg_main_t * pg, + pg_stream_t * s, + uword n_packets_to_generate) +{ + vlib_main_t * vm = pg->vlib_main; + u32 * to_next, n_this_frame, n_left, n_trace, n_packets_in_fifo; + uword n_packets_generated; + pg_buffer_index_t * bi, * bi0; + + bi0 = s->buffer_indices; + + n_packets_in_fifo = pg_stream_fill (pg, s, n_packets_to_generate); + n_packets_to_generate = clib_min (n_packets_in_fifo, n_packets_to_generate); + n_packets_generated = 0; + + while (n_packets_to_generate > 0) + { + u32 * head, * start, * end; + + vlib_get_next_frame (vm, node, s->next_index, to_next, n_left); + + n_this_frame = n_packets_to_generate; + if (n_this_frame > n_left) + n_this_frame = n_left; + + start = bi0->buffer_fifo; + end = clib_fifo_end (bi0->buffer_fifo); + head = clib_fifo_head (bi0->buffer_fifo); + + if (head + n_this_frame <= end) + vlib_copy_buffers (to_next, head, n_this_frame); + else + { + u32 n = end - head; + vlib_copy_buffers (to_next + 0, head, n); + vlib_copy_buffers (to_next + n, start, n_this_frame - n); + } + + vec_foreach (bi, s->buffer_indices) + clib_fifo_advance_head (bi->buffer_fifo, n_this_frame); + + n_trace = vlib_get_trace_count (vm, node); + if (n_trace > 0) + { + u32 n = clib_min (n_trace, n_this_frame); + pg_input_trace (pg, node, s, to_next, n); + vlib_set_trace_count (vm, node, n_trace - n); + } + n_packets_to_generate -= n_this_frame; + n_packets_generated += n_this_frame; + n_left -= n_this_frame; + vlib_put_next_frame (vm, node, s->next_index, n_left); + } + + return n_packets_generated; +} + +static uword +pg_input_stream (vlib_node_runtime_t * node, + pg_main_t * pg, + pg_stream_t * s) +{ + vlib_main_t * vm = pg->vlib_main; + uword n_packets; + f64 time_now, dt; + + if (s->n_packets_limit > 0 + && s->n_packets_generated >= s->n_packets_limit) + { + pg_stream_enable_disable (pg, s, /* want_enabled */ 0); + return 0; + } + + /* Apply rate limit. */ + time_now = vlib_time_now (vm); + if (s->time_last_generate == 0) + s->time_last_generate = time_now; + + dt = time_now - s->time_last_generate; + s->time_last_generate = time_now; + + n_packets = VLIB_FRAME_SIZE; + if (s->rate_packets_per_second > 0) + { + s->packet_accumulator += dt * s->rate_packets_per_second; + n_packets = s->packet_accumulator; + + /* Never allow accumulator to grow if we get behind. */ + s->packet_accumulator -= n_packets; + } + + /* Apply fixed limit. */ + if (s->n_packets_limit > 0 + && s->n_packets_generated + n_packets > s->n_packets_limit) + n_packets = s->n_packets_limit - s->n_packets_generated; + + /* Generate up to one frame's worth of packets. */ + if (n_packets > VLIB_FRAME_SIZE) + n_packets = VLIB_FRAME_SIZE; + + if (n_packets > 0) + n_packets = pg_generate_packets (node, pg, s, n_packets); + + s->n_packets_generated += n_packets; + + return n_packets; +} + +uword +pg_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + uword i; + pg_main_t * pg = &pg_main; + uword n_packets = 0; + + clib_bitmap_foreach (i, pg->enabled_streams, ({ + n_packets += pg_input_stream (node, pg, vec_elt_at_index (pg->streams, i)); + })); + + return n_packets; +} + +VLIB_REGISTER_NODE (pg_input_node) = { + .function = pg_input, + .name = "pg-input", + .type = VLIB_NODE_TYPE_INPUT, + + .format_trace = format_pg_input_trace, + + /* Input node will be left disabled until a stream is active. */ + .state = VLIB_NODE_STATE_DISABLED, +}; diff --git a/vnet/vnet/pg/output.c b/vnet/vnet/pg/output.c new file mode 100644 index 00000000000..cc098da21c6 --- /dev/null +++ b/vnet/vnet/pg/output.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pg_output.c: packet generator output + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> + +uword +pg_output (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * buffers = vlib_frame_args (frame); + uword n_buffers = frame->n_vectors; + vlib_buffer_free_no_next (vm, buffers, n_buffers); + return n_buffers; +} diff --git a/vnet/vnet/pg/pg.h b/vnet/vnet/pg/pg.h new file mode 100644 index 00000000000..63bfb18abca --- /dev/null +++ b/vnet/vnet/pg/pg.h @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pg.h: VLIB packet generator + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_pg_h +#define included_vlib_pg_h + +#include <vlib/vlib.h> /* for VLIB_N_RX_TX */ +#include <vnet/pg/edit.h> +#include <vppinfra/fifo.h> /* for buffer_fifo */ + +struct pg_main_t; +struct pg_stream_t; + +typedef struct pg_edit_group_t { + /* Edits in this group. */ + pg_edit_t * edits; + + /* Vector of non-fixed edits for this group. */ + pg_edit_t * non_fixed_edits; + + /* Fixed edits for this group. */ + u8 * fixed_packet_data; + u8 * fixed_packet_data_mask; + + /* Byte offset where packet data begins. */ + u32 start_byte_offset; + + /* Number of packet bytes for this edit group. */ + u32 n_packet_bytes; + + /* Function to perform miscellaneous edits (e.g. set IP checksum, ...). */ + void (* edit_function) (struct pg_main_t * pg, + struct pg_stream_t * s, + struct pg_edit_group_t * g, + u32 * buffers, + u32 n_buffers); + + /* Opaque data for edit function's use. */ + uword edit_function_opaque; +} pg_edit_group_t; + +/* Packets are made of multiple buffers chained together. + This struct keeps track of data per-chain index. */ +typedef struct { + /* Vector of buffer edits for this stream and buffer index. */ + pg_edit_t * edits; + + /* Buffers pre-initialized with fixed buffer data for this stream. */ + u32 * buffer_fifo; + + /* Buffer free list for this buffer index in stream. */ + u32 free_list_index; +} pg_buffer_index_t; + +typedef struct pg_stream_t { + /* Stream name. */ + u8 * name; + + u32 flags; + + /* Stream is currently enabled. */ +#define PG_STREAM_FLAGS_IS_ENABLED (1 << 0) +#define PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE (1 << 1) + + /* Edit groups are created by each protocol level (e.g. ethernet, + ip4, tcp, ...). */ + pg_edit_group_t * edit_groups; + + pg_edit_type_t packet_size_edit_type; + + /* Min/max packet size. */ + u32 min_packet_bytes, max_packet_bytes; + + /* Vector of non-fixed edits for this stream. + All fixed edits are performed and placed into fixed_packet_data. */ + pg_edit_t * non_fixed_edits; + + /* Packet data with all fixed edits performed. + All packets in stream are initialized according with this data. + Mask specifies which bits of packet data are covered by fixed edits. */ + u8 * fixed_packet_data, * fixed_packet_data_mask; + + /* Size to use for buffers. 0 means use buffers big enough + for max_packet_bytes. */ + u32 buffer_bytes; + + /* Last packet length if packet size edit type is increment. */ + u32 last_increment_packet_size; + + /* Index into main interface pool for this stream. */ + u32 pg_if_index; + + /* Interface used to mark packets for this stream. May be different + than hw/sw index from pg main interface pool. They will be + different if this stream is being used generate buffers as if + they were received on a non-pg interface. For example, suppose you + are trying to test vlan code and you want to generate buffers that + appear to come from an ethernet interface. */ + u32 sw_if_index[VLIB_N_RX_TX]; + + /* Node where stream's buffers get put. */ + u32 node_index; + + /* Output next index to reach output node from stream input node. */ + u32 next_index; + + /* Number of packets currently generated. */ + u64 n_packets_generated; + + /* Stream is disabled when packet limit is reached. + Zero means no packet limit. */ + u64 n_packets_limit; + + /* Rate for this stream in packets/second. + Zero means unlimited rate. */ + f64 rate_packets_per_second; + + f64 time_last_generate; + + f64 packet_accumulator; + + pg_buffer_index_t * buffer_indices; + + u8 ** replay_packet_templates; + u32 current_replay_packet_index; +} pg_stream_t; + +always_inline void +pg_buffer_index_free (pg_buffer_index_t * bi) +{ + vec_free (bi->edits); + clib_fifo_free (bi->buffer_fifo); +} + +always_inline void +pg_edit_group_free (pg_edit_group_t * g) +{ + pg_edit_t * e; + vec_foreach (e, g->edits) + pg_edit_free (e); + vec_free (g->edits); + vec_free (g->fixed_packet_data); + vec_free (g->fixed_packet_data_mask); +} + +always_inline void +pg_stream_free (pg_stream_t * s) +{ + pg_edit_group_t * g; + pg_edit_t * e; + vec_foreach (e, s->non_fixed_edits) + pg_edit_free (e); + vec_free (s->non_fixed_edits); + vec_foreach (g, s->edit_groups) + pg_edit_group_free (g); + vec_free (s->edit_groups); + vec_free (s->fixed_packet_data); + vec_free (s->fixed_packet_data_mask); + vec_free (s->name); + + { + pg_buffer_index_t * bi; + vec_foreach (bi, s->buffer_indices) + pg_buffer_index_free (bi); + vec_free (s->buffer_indices); + } +} + +always_inline int +pg_stream_is_enabled (pg_stream_t * s) +{ return (s->flags & PG_STREAM_FLAGS_IS_ENABLED) != 0; } + +always_inline pg_edit_group_t * +pg_stream_get_group (pg_stream_t * s, u32 group_index) +{ return vec_elt_at_index (s->edit_groups, group_index); } + +always_inline void * +pg_create_edit_group (pg_stream_t * s, + int n_edit_bytes, + int n_packet_bytes, + u32 * group_index) +{ + pg_edit_group_t * g; + int n_edits; + + vec_add2 (s->edit_groups, g, 1); + if (group_index) + *group_index = g - s->edit_groups; + + ASSERT (n_edit_bytes % sizeof (pg_edit_t) == 0); + n_edits = n_edit_bytes / sizeof (pg_edit_t); + vec_resize (g->edits, n_edits); + + g->n_packet_bytes = n_packet_bytes; + + return g->edits; +} + +always_inline void * +pg_add_edits (pg_stream_t * s, int n_edit_bytes, int n_packet_bytes, + u32 group_index) +{ + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + pg_edit_t * e; + int n_edits; + ASSERT (n_edit_bytes % sizeof (pg_edit_t) == 0); + n_edits = n_edit_bytes / sizeof (pg_edit_t); + vec_add2 (g->edits, e, n_edits); + g->n_packet_bytes += n_packet_bytes; + return e; +} + +always_inline void * +pg_get_edit_group (pg_stream_t * s, u32 group_index) +{ + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + return g->edits; +} + +/* Number of bytes for all groups >= given group. */ +always_inline uword +pg_edit_group_n_bytes (pg_stream_t * s, u32 group_index) +{ + pg_edit_group_t * g; + uword n_bytes = 0; + + for (g = s->edit_groups + group_index; g < vec_end (s->edit_groups); g++) + n_bytes += g->n_packet_bytes; + return n_bytes; +} + +always_inline void +pg_free_edit_group (pg_stream_t * s) +{ + uword i = vec_len (s->edit_groups) - 1; + pg_edit_group_t * g = pg_stream_get_group (s, i); + + pg_edit_group_free (g); + memset (g, 0, sizeof (g[0])); + _vec_len (s->edit_groups) = i; +} + +typedef struct { + /* VLIB interface indices. */ + u32 hw_if_index, sw_if_index; + + /* Identifies stream for this interface. */ + u32 stream_index; +} pg_interface_t; + +/* Per VLIB node data. */ +typedef struct { + /* Parser function indexed by node index. */ + unformat_function_t * unformat_edit; +} pg_node_t; + +typedef struct pg_main_t { + /* Back pointer to main structure. */ + vlib_main_t * vlib_main; + + /* Pool of streams. */ + pg_stream_t * streams; + + /* Bitmap indicating which streams are currently enabled. */ + uword * enabled_streams; + + /* Hash mapping name -> stream index. */ + uword * stream_index_by_name; + + /* Vector of interfaces. */ + pg_interface_t * interfaces; + + /* Per VLIB node information. */ + pg_node_t * nodes; + + u32 * free_interfaces; +} pg_main_t; + +/* Global main structure. */ +extern pg_main_t pg_main; + +/* Global node. */ +extern vlib_node_registration_t pg_input_node; + +/* Buffer generator input, output node functions. */ +vlib_node_function_t pg_input, pg_output; + +/* Stream add/delete. */ +void pg_stream_del (pg_main_t * pg, uword index); +void pg_stream_add (pg_main_t * pg, pg_stream_t * s_init); + +/* Enable/disable stream. */ +void pg_stream_enable_disable (pg_main_t * pg, pg_stream_t * s, int is_enable); + +/* Find/create free packet-generator interface index. */ +u32 pg_interface_find_free (pg_main_t * pg, uword stream_index); + +always_inline pg_node_t * +pg_get_node (uword node_index) +{ + pg_main_t * pg = &pg_main; + vec_validate (pg->nodes, node_index); + return pg->nodes + node_index; +} + +void pg_edit_group_get_fixed_packet_data (pg_stream_t * s, + u32 group_index, + void * fixed_packet_data, + void * fixed_packet_data_mask); + +#endif /* included_vlib_pg_h */ diff --git a/vnet/vnet/pg/stream.c b/vnet/vnet/pg/stream.c new file mode 100644 index 00000000000..1dd5624338f --- /dev/null +++ b/vnet/vnet/pg/stream.c @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pg_stream.c: packet generator streams + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> + +/* Mark stream active or inactive. */ +void pg_stream_enable_disable (pg_main_t * pg, pg_stream_t * s, int want_enabled) +{ + vnet_main_t * vnm = vnet_get_main(); + pg_interface_t * pi = vec_elt_at_index (pg->interfaces, s->pg_if_index); + + want_enabled = want_enabled != 0; + + if (pg_stream_is_enabled (s) == want_enabled) + /* No change necessary. */ + return; + + if (want_enabled) + s->n_packets_generated = 0; + + /* Toggle enabled flag. */ + s->flags ^= PG_STREAM_FLAGS_IS_ENABLED; + + ASSERT (! pool_is_free (pg->streams, s)); + + pg->enabled_streams + = clib_bitmap_set (pg->enabled_streams, s - pg->streams, want_enabled); + + vnet_hw_interface_set_flags (vnm, pi->hw_if_index, + (want_enabled + ? VNET_HW_INTERFACE_FLAG_LINK_UP + : 0)); + + vnet_sw_interface_set_flags (vnm, pi->sw_if_index, + (want_enabled + ? VNET_SW_INTERFACE_FLAG_ADMIN_UP + : 0)); + + vlib_node_set_state (pg->vlib_main, + pg_input_node.index, + (clib_bitmap_is_zero (pg->enabled_streams) + ? VLIB_NODE_STATE_DISABLED + : VLIB_NODE_STATE_POLLING)); + + s->packet_accumulator = 0; + s->time_last_generate = 0; +} + +static u8 * format_pg_interface_name (u8 * s, va_list * args) +{ + pg_main_t * pg = &pg_main; + u32 if_index = va_arg (*args, u32); + pg_interface_t * pi; + + pi = vec_elt_at_index (pg->interfaces, if_index); + s = format (s, "pg/stream-%d", pi->stream_index); + + return s; +} + +VNET_DEVICE_CLASS (pg_dev_class,static) = { + .name = "pg", + .tx_function = pg_output, + .format_device_name = format_pg_interface_name, +}; + +static uword pg_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + u16 * h = rewrite; + + if (max_rewrite_bytes < sizeof (h[0])) + return 0; + + h[0] = clib_host_to_net_u16 (l3_type); + return sizeof (h[0]); +} + +VNET_HW_INTERFACE_CLASS (pg_interface_class,static) = { + .name = "Packet generator", + .set_rewrite = pg_set_rewrite, +}; + +u32 pg_interface_find_free (pg_main_t * pg, uword stream_index) +{ + vnet_main_t * vnm = vnet_get_main(); + pg_interface_t * pi; + vnet_hw_interface_t * hi; + u32 i, l; + + if ((l = vec_len (pg->free_interfaces)) > 0) + { + i = pg->free_interfaces[l - 1]; + _vec_len (pg->free_interfaces) = l - 1; + pi = vec_elt_at_index (pg->interfaces, i); + pi->stream_index = stream_index; + } + else + { + i = vec_len (pg->interfaces); + vec_add2 (pg->interfaces, pi, 1); + + pi->stream_index = stream_index; + pi->hw_if_index = vnet_register_interface (vnm, + pg_dev_class.index, i, + pg_interface_class.index, stream_index); + hi = vnet_get_hw_interface (vnm, pi->hw_if_index); + pi->sw_if_index = hi->sw_if_index; + } + + return i; +} + +static void do_edit (pg_stream_t * stream, + pg_edit_group_t * g, + pg_edit_t * e, + uword want_commit) +{ + u32 i, i0, i1, mask, n_bits_left; + u8 * v, * s, * m; + + i0 = e->lsb_bit_offset / BITS (u8); + + /* Make space for edit in value and mask. */ + vec_validate (g->fixed_packet_data, i0); + vec_validate (g->fixed_packet_data_mask, i0); + + if (e->type != PG_EDIT_FIXED) + { + switch (e->type) + { + case PG_EDIT_RANDOM: + case PG_EDIT_INCREMENT: + e->last_increment_value = pg_edit_get_value (e, PG_EDIT_LO); + break; + + default: + break; + } + + if (want_commit) + { + ASSERT(e->type != PG_EDIT_INVALID_TYPE); + vec_add1 (g->non_fixed_edits, e[0]); + } + return; + } + + s = g->fixed_packet_data; + m = g->fixed_packet_data_mask; + + n_bits_left = e->n_bits; + i0 = e->lsb_bit_offset / BITS (u8); + i1 = e->lsb_bit_offset % BITS (u8); + + v = e->values[PG_EDIT_LO]; + i = pg_edit_n_alloc_bytes (e) - 1; + + /* Odd low order bits?. */ + if (i1 != 0 && n_bits_left > 0) + { + u32 n = clib_min (n_bits_left, BITS (u8) - i1); + + mask = pow2_mask (n) << i1; + + ASSERT (i0 < vec_len (s)); + ASSERT (i < vec_len (v)); + ASSERT ((v[i] &~ mask) == 0); + + s[i0] |= v[i] & mask; + m[i0] |= mask; + + i0--; + i--; + n_bits_left -= n; + } + + /* Even bytes. */ + while (n_bits_left >= 8) + { + ASSERT (i0 < vec_len (s)); + ASSERT (i < vec_len (v)); + + s[i0] = v[i]; + m[i0] = ~0; + + i0--; + i--; + n_bits_left -= 8; + } + + /* Odd high order bits. */ + if (n_bits_left > 0) + { + mask = pow2_mask (n_bits_left); + + ASSERT (i0 < vec_len (s)); + ASSERT (i < vec_len (v)); + ASSERT ((v[i] &~ mask) == 0); + + s[i0] |= v[i] & mask; + m[i0] |= mask; + } + + if (want_commit) + pg_edit_free (e); +} + +void pg_edit_group_get_fixed_packet_data (pg_stream_t * s, + u32 group_index, + void * packet_data, + void * packet_data_mask) +{ + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + pg_edit_t * e; + + vec_foreach (e, g->edits) + do_edit (s, g, e, /* want_commit */ 0); + + memcpy (packet_data, g->fixed_packet_data, vec_len (g->fixed_packet_data)); + memcpy (packet_data_mask, g->fixed_packet_data_mask, vec_len (g->fixed_packet_data_mask)); +} + +static void perform_fixed_edits (pg_stream_t * s) +{ + pg_edit_group_t * g; + pg_edit_t * e; + word i; + + for (i = vec_len (s->edit_groups) - 1; i >= 0; i--) + { + g = vec_elt_at_index (s->edit_groups, i); + vec_foreach (e, g->edits) + do_edit (s, g, e, /* want_commit */ 1); + + /* All edits have either been performed or added to + g->non_fixed_edits. So, we can delete the vector. */ + vec_free (g->edits); + } + + vec_free (s->fixed_packet_data_mask); + vec_free (s->fixed_packet_data); + vec_foreach (g, s->edit_groups) + { + int i; + g->start_byte_offset = vec_len (s->fixed_packet_data); + + /* Relocate and copy non-fixed edits from group to stream. */ + vec_foreach (e, g->non_fixed_edits) + e->lsb_bit_offset += g->start_byte_offset * BITS (u8); + + for (i = 0; i < vec_len (g->non_fixed_edits); i++) + ASSERT(g->non_fixed_edits[i].type != PG_EDIT_INVALID_TYPE); + + vec_add (s->non_fixed_edits, + g->non_fixed_edits, + vec_len (g->non_fixed_edits)); + vec_free (g->non_fixed_edits); + + vec_add (s->fixed_packet_data, + g->fixed_packet_data, + vec_len (g->fixed_packet_data)); + vec_add (s->fixed_packet_data_mask, + g->fixed_packet_data_mask, + vec_len (g->fixed_packet_data_mask)); + } +} + +void pg_stream_add (pg_main_t * pg, pg_stream_t * s_init) +{ + vlib_main_t * vm = pg->vlib_main; + pg_stream_t * s; + uword * p; + + if (! pg->stream_index_by_name) + pg->stream_index_by_name + = hash_create_vec (0, sizeof (s->name[0]), sizeof (uword)); + + /* Delete any old stream with the same name. */ + if (s_init->name + && (p = hash_get_mem (pg->stream_index_by_name, s_init->name))) + { + pg_stream_del (pg, p[0]); + } + + pool_get (pg->streams, s); + s[0] = s_init[0]; + + /* Give it a name. */ + if (! s->name) + s->name = format (0, "stream%d", s - pg->streams); + else + s->name = vec_dup (s->name); + + hash_set_mem (pg->stream_index_by_name, s->name, s - pg->streams); + + /* Get fixed part of buffer data. */ + perform_fixed_edits (s); + + /* Determine packet size. */ + switch (s->packet_size_edit_type) + { + case PG_EDIT_INCREMENT: + case PG_EDIT_RANDOM: + if (s->min_packet_bytes == s->max_packet_bytes) + s->packet_size_edit_type = PG_EDIT_FIXED; + break; + + default: + /* Get packet size from fixed edits. */ + s->packet_size_edit_type = PG_EDIT_FIXED; + if (! s->replay_packet_templates) + s->min_packet_bytes = s->max_packet_bytes = vec_len (s->fixed_packet_data); + break; + } + + s->last_increment_packet_size = s->min_packet_bytes; + + { + pg_buffer_index_t * bi; + int n; + + if (! s->buffer_bytes) + s->buffer_bytes = s->max_packet_bytes; + + s->buffer_bytes = vlib_buffer_round_size (s->buffer_bytes); + + n = s->max_packet_bytes / s->buffer_bytes; + n += (s->max_packet_bytes % s->buffer_bytes) != 0; + + vec_resize (s->buffer_indices, n); + + vec_foreach (bi, s->buffer_indices) + bi->free_list_index = vlib_buffer_create_free_list (vm, s->buffer_bytes, + "pg stream %d buffer #%d", + s - pg->streams, + 1 + (bi - s->buffer_indices)); + } + + /* Find an interface to use. */ + s->pg_if_index = pg_interface_find_free (pg, s - pg->streams); + + { + pg_interface_t * pi = vec_elt_at_index (pg->interfaces, s->pg_if_index); + vlib_rx_or_tx_t rx_or_tx; + + vlib_foreach_rx_tx (rx_or_tx) + { + if (s->sw_if_index[rx_or_tx] == ~0) + s->sw_if_index[rx_or_tx] = pi->sw_if_index; + } + } + + /* Connect the graph. */ + s->next_index = vlib_node_add_next (vm, pg_input_node.index, s->node_index); +} + +void pg_stream_del (pg_main_t * pg, uword index) +{ + vlib_main_t * vm = pg->vlib_main; + pg_stream_t * s; + pg_buffer_index_t * bi; + + s = pool_elt_at_index (pg->streams, index); + + pg_stream_enable_disable (pg, s, /* want_enabled */ 0); + vec_add1 (pg->free_interfaces, s->pg_if_index); + hash_unset_mem (pg->stream_index_by_name, s->name); + + vec_foreach (bi, s->buffer_indices) + { + vlib_buffer_delete_free_list (vm, bi->free_list_index); + clib_fifo_free (bi->buffer_fifo); + } + + pg_stream_free (s); + pool_put (pg->streams, s); +} + diff --git a/vnet/vnet/pipeline.h b/vnet/vnet/pipeline.h new file mode 100644 index 00000000000..5a0d4dcc616 --- /dev/null +++ b/vnet/vnet/pipeline.h @@ -0,0 +1,453 @@ +/* + * vnet/pipeline.h: software pipeline + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Usage example. + * + * #define NSTAGES 3 or whatever + * + * <Define pipeline stages> + * + * #include <vnet/pipeline.h> + * + * static uword my_node_fn (vlib_main_t * vm, + * vlib_node_runtime_t * node, + * vlib_frame_t * frame) + * { + * return dispatch_pipeline (vm, node, frame); + * } + * + */ + +#ifndef NSTAGES +#error files which #include <vnet/pipeline.h> must define NSTAGES +#endif + +#ifndef STAGE_INLINE +#define STAGE_INLINE inline +#endif + +/* + * A prefetch stride of 2 is quasi-equivalent to doubling the number + * of stages with every other pipeline stage empty. + */ + +/* + * This is a typical first pipeline stage, which prefetches + * buffer metadata and the first line of pkt data. + * To use it: + * #define stage0 generic_stage0 + */ +static STAGE_INLINE void generic_stage0 (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 buffer_index) +{ + /* generic default stage 0 here */ + vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index); + vlib_prefetch_buffer_header (b, STORE); + CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, STORE); +} + +#if NSTAGES == 2 + +static STAGE_INLINE uword +dispatch_pipeline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, * to_next, next_index, next0; + int pi, pi_limit; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + pi_limit = clib_min (n_left_from, n_left_to_next); + + for (pi = 0; pi < NSTAGES-1; pi++) + { + if(pi == pi_limit) + break; + stage0 (vm, node, from[pi]); + } + + for (; pi < pi_limit; pi++) + { + stage0 (vm, node, from[pi]); + to_next[0] = from [pi - 1]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from [pi - 1]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 1], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + + for (; pi < (pi_limit + (NSTAGES-1)); pi++) + { + if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) + { + to_next[0] = from [pi - 1]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from [pi - 1]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 1], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + from += pi_limit; + } + return frame->n_vectors; +} +#endif + +#if NSTAGES == 3 +static STAGE_INLINE uword +dispatch_pipeline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, * to_next, next_index, next0; + int pi, pi_limit; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + pi_limit = clib_min (n_left_from, n_left_to_next); + + for (pi = 0; pi < NSTAGES-1; pi++) + { + if(pi == pi_limit) + break; + stage0 (vm, node, from[pi]); + if (pi-1 >= 0) + stage1 (vm, node, from[pi-1]); + } + + for (; pi < pi_limit; pi++) + { + stage0 (vm, node, from[pi]); + stage1 (vm, node, from[pi-1]); + to_next[0] = from [pi - 2]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from [pi - 2]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 2], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + + + for (; pi < (pi_limit + (NSTAGES-1)); pi++) + { + if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) + stage1 (vm, node, from[pi-1]); + if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) + { + to_next[0] = from[pi - 2]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from [pi - 2]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 2], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + from += pi_limit; + } + return frame->n_vectors; +} +#endif + +#if NSTAGES == 4 +static STAGE_INLINE uword +dispatch_pipeline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, * to_next, next_index, next0; + int pi, pi_limit; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + pi_limit = clib_min (n_left_from, n_left_to_next); + + for (pi = 0; pi < NSTAGES-1; pi++) + { + if(pi == pi_limit) + break; + stage0 (vm, node, from[pi]); + if (pi-1 >= 0) + stage1 (vm, node, from[pi-1]); + if (pi-2 >= 0) + stage2 (vm, node, from[pi-2]); + } + + for (; pi < pi_limit; pi++) + { + stage0 (vm, node, from[pi]); + stage1 (vm, node, from[pi-1]); + stage2 (vm, node, from[pi-2]); + to_next[0] = from [pi - 3]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from [pi - 3]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 3], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + + + for (; pi < (pi_limit + (NSTAGES-1)); pi++) + { + if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) + stage1 (vm, node, from[pi-1]); + if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) + stage2 (vm, node, from[pi-2]); + if (((pi - 3) >= 0) && ((pi - 3) < pi_limit)) + { + to_next[0] = from[pi - 3]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from [pi - 3]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 3], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + from += pi_limit; + } + return frame->n_vectors; +} +#endif + + +#if NSTAGES == 5 +static STAGE_INLINE uword +dispatch_pipeline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, * to_next, next_index, next0; + int pi, pi_limit; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + pi_limit = clib_min (n_left_from, n_left_to_next); + + for (pi = 0; pi < NSTAGES-1; pi++) + { + if(pi == pi_limit) + break; + stage0 (vm, node, from[pi]); + if (pi-1 >= 0) + stage1 (vm, node, from[pi-1]); + if (pi-2 >= 0) + stage2 (vm, node, from[pi-2]); + if (pi-3 >= 0) + stage3 (vm, node, from[pi-3]); + } + + for (; pi < pi_limit; pi++) + { + stage0 (vm, node, from[pi]); + stage1 (vm, node, from[pi-1]); + stage2 (vm, node, from[pi-2]); + stage3 (vm, node, from[pi-3]); + to_next[0] = from [pi - 4]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from [pi - 4]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 4], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + + + for (; pi < (pi_limit + (NSTAGES-1)); pi++) + { + if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) + stage1 (vm, node, from[pi-1]); + if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) + stage2 (vm, node, from[pi - 2]); + if (((pi - 3) >= 0) && ((pi - 3) < pi_limit)) + stage3 (vm, node, from[pi - 3]); + if (((pi - 4) >= 0) && ((pi - 4) < pi_limit)) + { + to_next[0] = from[pi - 4]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from [pi - 4]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 4], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + from += pi_limit; + } + return frame->n_vectors; +} +#endif + +#if NSTAGES == 6 +static STAGE_INLINE uword +dispatch_pipeline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, * to_next, next_index, next0; + int pi, pi_limit; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + pi_limit = clib_min (n_left_from, n_left_to_next); + + for (pi = 0; pi < NSTAGES-1; pi++) + { + if(pi == pi_limit) + break; + stage0 (vm, node, from[pi]); + if (pi-1 >= 0) + stage1 (vm, node, from[pi-1]); + if (pi-2 >= 0) + stage2 (vm, node, from[pi-2]); + if (pi-3 >= 0) + stage3 (vm, node, from[pi-3]); + if (pi-4 >= 0) + stage4 (vm, node, from[pi-4]); + } + + for (; pi < pi_limit; pi++) + { + stage0 (vm, node, from[pi]); + stage1 (vm, node, from[pi-1]); + stage2 (vm, node, from[pi-2]); + stage3 (vm, node, from[pi-3]); + stage4 (vm, node, from[pi-4]); + to_next[0] = from [pi - 5]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from [pi - 5]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 5], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + + + for (; pi < (pi_limit + (NSTAGES-1)); pi++) + { + if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) + stage1 (vm, node, from[pi-1]); + if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) + stage2 (vm, node, from[pi - 2]); + if (((pi - 3) >= 0) && ((pi - 3) < pi_limit)) + stage3 (vm, node, from[pi - 3]); + if (((pi - 4) >= 0) && ((pi - 4) < pi_limit)) + stage4 (vm, node, from[pi - 4]); + if (((pi - 5) >= 0) && ((pi - 5) < pi_limit)) + { + to_next[0] = from[pi - 5]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from [pi - 5]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 5], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + from += pi_limit; + } + return frame->n_vectors; +} +#endif diff --git a/vnet/vnet/plugin/p1.c b/vnet/vnet/plugin/p1.c new file mode 100644 index 00000000000..6ede7938ee9 --- /dev/null +++ b/vnet/vnet/plugin/p1.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * This file and in fact the entire directory shouldn't even exist. + * Vnet ought to be a dynamic library. + + * Unfortunately, various things malfunction when we try to go there. + * Plugin DLL's end up with their own copies of critical + * data structures. No one of these problems would be tough to fix, + * but there are quite a number of them. + */ + +/* + * Make certain that plugin .dll's which reference the following functions + * can find them... + */ + +#define foreach_plugin_reference \ +_(unformat_vnet_hw_interface) \ +_(unformat_vnet_sw_interface) \ +_(vnet_hw_interface_rx_redirect_to_node) \ +_(vnet_config_add_feature) \ +_(vnet_config_del_feature) \ +_(vnet_get_main) \ +_(_vlib_init_function_l2_init) \ +_(_vlib_init_function_pg_init) \ +_(_vlib_init_function_ip_main_init) \ +_(_vlib_init_function_ethernet_init) \ +_(_vlib_init_function_ethernet_arp_init) \ +_(l2input_intf_bitmap_enable) \ +_(ip4_main) \ +_(ip6_main) \ +_(format_ip4_address) \ +_(unformat_ip4_address) \ +_(ip4_address_compare) \ +_(ip6_address_compare) \ +_(format_ip6_address) \ +_(format_ip6_address_and_length) \ +_(udp_register_dst_port) \ +_(ethernet_register_input_type) \ +_(ethernet_set_flags) \ +_(vnet_register_sr_app_callback) \ +_(format_ip6_sr_header) \ +_(format_ip6_address) \ +_(unformat_ip6_address) \ +_(ip6_main) \ +_(find_ip6_fib_by_table_index_or_id) \ +_(format_ethernet_address) \ +_(unformat_ethernet_address) \ +_(unformat_ethernet_interface) \ +_(ethernet_register_l2_input) \ +_(ethernet_register_l3_redirect) \ +_(unformat_pg_payload) \ +_(format_ip4_address_and_length) \ +_(ip_incremental_checksum) \ +_(ethernet_sw_interface_set_l2_mode) \ +_(vnet_create_loopback_interface) \ +_(ethernet_set_rx_redirect) \ +_(ethernet_set_flags) \ +_(ethernet_get_main) \ +_(ethernet_get_interface) \ +_(vnet_hw_interface_set_flags) \ +_(vnet_sw_interface_set_flags) \ +_(vnet_create_sw_interface) \ +_(vnet_delete_sw_interface) \ +_(vnet_get_main) \ +_(pg_stream_add) \ +_(pg_stream_del) \ +_(pg_stream_enable_disable) \ +_(pg_main) + +#if DPDK > 0 +#define foreach_dpdk_plugin_reference \ +_(dpdk_set_next_node) \ +_(dpdk_worker_thread) \ +_(dpdk_io_thread) \ +_(dpdk_frame_queue_dequeue) \ +_(vlib_get_handoff_queue_elt) \ +_(dpdk_get_handoff_node_index) \ +_(dpdk_set_flowcontrol_callback) \ +_(dpdk_interface_tx_vector) \ +_(rte_calloc) \ +_(rte_free) \ +_(rte_malloc) \ +_(post_sw_interface_set_flags) \ +_(dpdk_get_admin_up_down_in_progress) \ +_(efd_config) +#else +#define foreach_dpdk_plugin_reference +#endif + +#define _(a) void a (void); +foreach_plugin_reference +foreach_dpdk_plugin_reference +#undef _ + +void *vnet_library_plugin_references[] = + { +#define _(a) &a, + foreach_plugin_reference + foreach_dpdk_plugin_reference +#undef _ + }; + +void vnet_library_plugin_reference(void) { } diff --git a/vnet/vnet/plugin/plugin.h b/vnet/vnet/plugin/plugin.h new file mode 100644 index 00000000000..a14a5932b50 --- /dev/null +++ b/vnet/vnet/plugin/plugin.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_plugin_h +#define included_vnet_plugin_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/error.h> + +/* Pointers to Genuine Vnet data structures handed to plugin .dll's */ +typedef struct { + vnet_main_t * vnet_main; + ethernet_main_t * ethernet_main; +} vnet_plugin_handoff_t; + +void * vnet_get_handoff_structure (void); + +#endif /* included_vnet_plugin_h */ diff --git a/vnet/vnet/policer/fix_types.h b/vnet/vnet/policer/fix_types.h new file mode 100644 index 00000000000..cbb79e049fc --- /dev/null +++ b/vnet/vnet/policer/fix_types.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_fix_types_h__ +#define __included_fix_types_h__ + +/* deal with various imported type choices */ +#define cerrno int +#define trans_layer_rc int +#define EOK 0 +#define CERR_IS_NOTOK(a) (a != EOK) +#define PACKED +#define OK_pushHW EOK +#define Not_OK (-1) + +typedef unsigned char uint8_t; +typedef unsigned short int uint16_t; +typedef unsigned int uint32_t; + +#endif /* __included_fix_types_h__ */ diff --git a/vnet/vnet/policer/node_funcs.c b/vnet/vnet/policer/node_funcs.c new file mode 100644 index 00000000000..3badcfdfd1f --- /dev/null +++ b/vnet/vnet/policer/node_funcs.c @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/policer/policer.h> + +/* Dispatch functions meant to be instantiated elsewhere */ + +typedef struct { + u32 next_index; + u32 sw_if_index; + u32 policer_index; +} vnet_policer_trace_t; + +/* packet trace format function */ +static u8 * format_policer_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + vnet_policer_trace_t * t = va_arg (*args, vnet_policer_trace_t *); + + s = format (s, "VNET_POLICER: sw_if_index %d policer_index %d next %d", + t->sw_if_index, t->policer_index, t->next_index); + return s; +} + +#define foreach_vnet_policer_error \ +_(TRANSMIT, "Packets Transmitted") \ +_(DROP, "Packets Dropped") + +typedef enum { +#define _(sym,str) VNET_POLICER_ERROR_##sym, + foreach_vnet_policer_error +#undef _ + VNET_POLICER_N_ERROR, +} vnet_policer_error_t; + +static char * vnet_policer_error_strings[] = { +#define _(sym,string) string, + foreach_vnet_policer_error +#undef _ +}; + +static inline +uword vnet_policer_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + vnet_policer_index_t which) +{ + u32 n_left_from, * from, * to_next; + vnet_policer_next_t next_index; + vnet_policer_main_t * pm = &vnet_policer_main; + u64 time_in_policer_periods; + u32 transmitted = 0; + + time_in_policer_periods = + clib_cpu_time_now() >> POLICER_TICKS_PER_PERIOD_SHIFT; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + u32 pi0, pi1; + u32 len0, len1; + u32 col0, col1; + policer_read_response_type_st * pol0, * pol1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * b2, * b3; + + b2 = vlib_get_buffer (vm, from[2]); + b3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (b2, LOAD); + vlib_prefetch_buffer_header (b3, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + next0 = VNET_POLICER_NEXT_TRANSMIT; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + next1 = VNET_POLICER_NEXT_TRANSMIT; + + + if (which == VNET_POLICER_INDEX_BY_SW_IF_INDEX) + { + pi0 = pm->policer_index_by_sw_if_index[sw_if_index0]; + pi1 = pm->policer_index_by_sw_if_index[sw_if_index1]; + } + + if (which == VNET_POLICER_INDEX_BY_OPAQUE) + { + pi0 = vnet_buffer(b0)->policer.index; + pi1 = vnet_buffer(b1)->policer.index; + } + + if (which == VNET_POLICER_INDEX_BY_EITHER) + { + pi0 = vnet_buffer(b0)->policer.index; + pi0 = (pi0 != ~0) ? pi0 : + pm->policer_index_by_sw_if_index [sw_if_index0]; + pi1 = vnet_buffer(b1)->policer.index; + pi1 = (pi1 != ~0) ? pi1 : + pm->policer_index_by_sw_if_index [sw_if_index1]; + } + + len0 = vlib_buffer_length_in_chain (vm, b0); + pol0 = &pm->policers [pi0]; + col0 = vnet_police_packet (pol0, len0, + POLICE_CONFORM /* no chaining */, + time_in_policer_periods); + + len1 = vlib_buffer_length_in_chain (vm, b1); + pol1 = &pm->policers [pi1]; + col1 = vnet_police_packet (pol1, len1, + POLICE_CONFORM /* no chaining */, + time_in_policer_periods); + + if (PREDICT_FALSE(col0 > 0)) + { + next0 = VNET_POLICER_NEXT_DROP; + b0->error = node->errors[VNET_POLICER_ERROR_DROP]; + } + else + transmitted++; + + if (PREDICT_FALSE(col1 > 0)) + { + next1 = VNET_POLICER_NEXT_DROP; + b1->error = node->errors[VNET_POLICER_ERROR_DROP]; + } + else + transmitted++; + + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + vnet_policer_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + vnet_policer_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + u32 pi0; + u32 len0; + u32 col0; + policer_read_response_type_st * pol0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + next0 = VNET_POLICER_NEXT_TRANSMIT; + + if (which == VNET_POLICER_INDEX_BY_SW_IF_INDEX) + pi0 = pm->policer_index_by_sw_if_index[sw_if_index0]; + + if (which == VNET_POLICER_INDEX_BY_OPAQUE) + pi0 = vnet_buffer(b0)->policer.index; + + if (which == VNET_POLICER_INDEX_BY_EITHER) + { + pi0 = vnet_buffer(b0)->policer.index; + pi0 = (pi0 != ~0) ? pi0 : + pm->policer_index_by_sw_if_index [sw_if_index0]; + } + + len0 = vlib_buffer_length_in_chain (vm, b0); + pol0 = &pm->policers [pi0]; + col0 = vnet_police_packet (pol0, len0, + POLICE_CONFORM /* no chaining */, + time_in_policer_periods); + + if (PREDICT_FALSE(col0 > 0)) + { + next0 = VNET_POLICER_NEXT_DROP; + b0->error = node->errors[VNET_POLICER_ERROR_DROP]; + } + else + { + transmitted++; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + vnet_policer_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->policer_index = pi0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + VNET_POLICER_ERROR_TRANSMIT, + transmitted); + return frame->n_vectors; +} + +uword vnet_policer_by_sw_if_index (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return vnet_policer_inline (vm, node, frame, + VNET_POLICER_INDEX_BY_SW_IF_INDEX); +} + +uword vnet_policer_by_opaque (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return vnet_policer_inline (vm, node, frame, + VNET_POLICER_INDEX_BY_OPAQUE); +} + +uword vnet_policer_by_either (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return vnet_policer_inline (vm, node, frame, + VNET_POLICER_INDEX_BY_EITHER); +} + +void vnet_policer_node_funcs_reference (void) { } + + +#define TEST_CODE 1 + +#ifdef TEST_CODE + +VLIB_REGISTER_NODE (policer_by_sw_if_index_node, static) = { + .function = vnet_policer_by_sw_if_index, + .name = "policer-by-sw-if-index", + .vector_size = sizeof (u32), + .format_trace = format_policer_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(vnet_policer_error_strings), + .error_strings = vnet_policer_error_strings, + + .n_next_nodes = VNET_POLICER_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [VNET_POLICER_NEXT_TRANSMIT] = "ethernet-input", + [VNET_POLICER_NEXT_DROP] = "error-drop", + }, +}; + + +int test_policer_add_del (u32 rx_sw_if_index, u8 *config_name, + int is_add) +{ + vnet_policer_main_t * pm = &vnet_policer_main; + policer_read_response_type_st * template; + policer_read_response_type_st * policer; + vnet_hw_interface_t * rxhi; + uword * p; + + rxhi = vnet_get_sup_hw_interface (pm->vnet_main, rx_sw_if_index); + + /* Make sure caller didn't pass a vlan subif, etc. */ + if (rxhi->sw_if_index != rx_sw_if_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (is_add) + { + + p = hash_get_mem (pm->policer_config_by_name, config_name); + + if (p == 0) + return -2; + + template = pool_elt_at_index (pm->policer_templates, p[0]); + + vnet_hw_interface_rx_redirect_to_node + (pm->vnet_main, + rxhi->hw_if_index, + policer_by_sw_if_index_node.index); + + pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES); + + policer[0] = template[0]; + + vec_validate (pm->policer_index_by_sw_if_index, rx_sw_if_index); + pm->policer_index_by_sw_if_index[rx_sw_if_index] + = policer - pm->policers; + } + else + { + u32 pi; + vnet_hw_interface_rx_redirect_to_node (pm->vnet_main, + rxhi->hw_if_index, + ~0 /* disable */); + + pi = pm->policer_index_by_sw_if_index[rx_sw_if_index]; + pm->policer_index_by_sw_if_index[rx_sw_if_index] = ~0; + pool_put_index (pm->policers, pi); + } + + return 0; +} + +static clib_error_t * +test_policer_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_policer_main_t * pm = &vnet_policer_main; + unformat_input_t _line_input, * line_input = &_line_input; + u32 rx_sw_if_index; + int rv; + u8 * config_name = 0; + int rx_set = 0; + int is_add = 1; + int is_show = 0; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "intfc %U", unformat_vnet_sw_interface, + pm->vnet_main, &rx_sw_if_index)) + rx_set = 1; + else if (unformat (line_input, "show")) + is_show=1; + else if (unformat (line_input, "policer %s", &config_name)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else break; + } + + if (rx_set == 0) + return clib_error_return (0, "interface not set"); + + if (is_show) + { + u32 pi = pm->policer_index_by_sw_if_index[rx_sw_if_index]; + policer_read_response_type_st * policer; + policer = pool_elt_at_index (pm->policers, pi); + + vlib_cli_output (vm, "%U", format_policer_instance, policer); + return 0; + } + + if (is_add && config_name == 0) + { + return clib_error_return (0, "policer config name required"); + } + + rv = test_policer_add_del (rx_sw_if_index, config_name, is_add); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return + (0, "WARNING: vnet_vnet_policer_add_del returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (test_patch_command, static) = { + .path = "test policer", + .short_help = + "intfc <intfc> policer <policer-config-name> [del]", + .function = test_policer_command_fn, +}; + + +#endif /* TEST_CODE */ diff --git a/vnet/vnet/policer/police.h b/vnet/vnet/policer/police.h new file mode 100644 index 00000000000..d2e369c655f --- /dev/null +++ b/vnet/vnet/policer/police.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __POLICE_H__ +#define __POLICE_H__ + +typedef enum { + POLICE_CONFORM = 0, + POLICE_EXCEED = 1, + POLICE_VIOLATE = 2, +} policer_result_e; + +// This is the hardware representation of the policer. +// To be multithread-safe, the policer is accessed through a spin-lock +// on the lock field. (For a policer update operation, 24B needs to be +// modified and this would be a challenge to do with atomic instructions.) +// The structure is padded so that no other data is put into the same +// 64B cache-line. This reduces cache-thrashing between threads. +// +// A note on scale: +// The HW TSC tick is roughly one CPU clock cycle. +// This is shifted to create a larger period, with a goal to be around 50usec. +// The period time will vary based on CPU clock speed. +// CPU speeds of 1Ghz to 8Ghz are targetted. +// The shift amount is a constant 17 bits, resulting in a period between +// 16usec (8Ghz CPU) and 131usec (1Ghz CPU). +// The token_per_period computation takes into account the clock speed. +// +// The 32-bit bucket/limit supports about 850ms of burst on a 40GE port, +// or 340ms on a 100GE port. If a larger burst is configued, then the +// programmed value is simply capped at 2^32-1. If we needed to support +// more than that, the bucket and limit fields could be expanded. +// +// tokens_per_period should be > 1000 to support 0.1% granularity. +// To support lower rates (which would not meet this requirement), the packet +// length, bucket, and limit values can be scaled. The scale is a power of 2 +// so the multiplication can be implemented as a shift. The control plane +// computes the shift amount be the largest possible that still supports the +// burst size. This makes the rate accuracy as high as possible. +// +// The 64-bit last_update_time supports a 4Ghz CPU without rollover for 100 years +// +// The lock field should be used for a spin-lock on the struct. + +#define POLICER_TICKS_PER_PERIOD_SHIFT 17 +#define POLICER_TICKS_PER_PERIOD (1 << POLICER_TICKS_PER_PERIOD_SHIFT) + +typedef struct { + + uint32_t lock; // for exclusive access to the struct + + uint32_t single_rate; // 1 = single rate policer, 0 = two rate policer + uint32_t color_aware; // for hierarchical policing + uint32_t scale; // power-of-2 shift amount for lower rates + uint32_t pad[2]; + + // Fields are marked as 2R if they are only used for a 2-rate policer, + // and MOD if they are modified as part of the update operation. + // 1 token = 1 byte. + + uint32_t cir_tokens_per_period; // # of tokens for each period + uint32_t pir_tokens_per_period; // 2R + + uint32_t current_limit; + uint32_t current_bucket; // MOD + uint32_t extended_limit; + uint32_t extended_bucket; // MOD + + uint64_t last_update_time; // MOD + uint64_t pad64; + +} policer_read_response_type_st; + +static inline policer_result_e +vnet_police_packet (policer_read_response_type_st *policer, + uint32_t packet_length, + policer_result_e packet_color, + uint64_t time) +{ + uint64_t n_periods; + uint64_t current_tokens, extended_tokens; + policer_result_e result; + + // Scale packet length to support a wide range of speeds + packet_length = packet_length << policer->scale; + + // Compute the number of policer periods that have passed since the last + // operation. + n_periods = time - policer->last_update_time; + policer->last_update_time = time; + + // Since there is no background last-update-time adjustment, n_periods + // could grow large if the policer is idle for a long time. This could + // cause a 64-bit overflow when computing tokens_per_period * num_periods. + // It will overflow if log2(n_periods) + log2(tokens_per_period) > 64. + // + // To mitigate this, the policer configuration algorithm insures that + // tokens_per_period is less than 2^22, i.e. this is a 22 bit value not + // a 32-bit value. Thus overflow will only occur if n_periods > 64-22 or + // 42. 2^42 min-sized periods is 16us * 2^42, or 2 years. So this can + // rarely occur. If overflow does happen, the only effect will be that + // fewer tokens than the max burst will be added to the bucket for this + // packet. This constraint on tokens_per_period lets the ucode omit + // code to dynamically check for or prevent the overflow. + + if (policer->single_rate) { + + // Compute number of tokens for this time period + current_tokens = policer->current_bucket + n_periods * policer->cir_tokens_per_period; + if (current_tokens > policer->current_limit) { + current_tokens = policer->current_limit; + } + + extended_tokens = policer->extended_bucket + n_periods * policer->cir_tokens_per_period; + if (extended_tokens > policer->extended_limit) { + extended_tokens = policer->extended_limit; + } + + // Determine color + + if ((!policer->color_aware || (packet_color == POLICE_CONFORM)) && (current_tokens >= packet_length)) { + policer->current_bucket = current_tokens - packet_length; + policer->extended_bucket = extended_tokens - packet_length; + result = POLICE_CONFORM; + } else if ((!policer->color_aware || (packet_color != POLICE_VIOLATE)) && (extended_tokens >= packet_length)) { + policer->current_bucket = current_tokens; + policer->extended_bucket = extended_tokens - packet_length; + result = POLICE_EXCEED; + } else { + policer->current_bucket = current_tokens; + policer->extended_bucket = extended_tokens; + result = POLICE_VIOLATE; + } + + } else { + // Two-rate policer + + // Compute number of tokens for this time period + current_tokens = policer->current_bucket + n_periods * policer->cir_tokens_per_period; + extended_tokens = policer->extended_bucket + n_periods * policer->pir_tokens_per_period; + if (current_tokens > policer->current_limit) { + current_tokens = policer->current_limit; + } + if (extended_tokens > policer->extended_limit) { + extended_tokens = policer->extended_limit; + } + + // Determine color + + if ((policer->color_aware && (packet_color == POLICE_VIOLATE)) || (extended_tokens < packet_length)) { + policer->current_bucket = current_tokens; + policer->extended_bucket = extended_tokens; + result = POLICE_VIOLATE; + } else if ((policer->color_aware && (packet_color == POLICE_EXCEED)) || (current_tokens < packet_length)) { + policer->current_bucket = current_tokens; + policer->extended_bucket = extended_tokens - packet_length; + result = POLICE_EXCEED; + } else { + policer->current_bucket = current_tokens - packet_length; + policer->extended_bucket = extended_tokens - packet_length; + result = POLICE_CONFORM; + } + } + return result; +} + +#endif // __POLICE_H__ diff --git a/vnet/vnet/policer/policer.c b/vnet/vnet/policer/policer.c new file mode 100644 index 00000000000..310c5f59f20 --- /dev/null +++ b/vnet/vnet/policer/policer.c @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/policer/policer.h> + +u8 * format_policer_instance (u8 * s, va_list * va) +{ + policer_read_response_type_st * i + = va_arg (*va, policer_read_response_type_st *); + + s = format (s, "policer at %llx: %s rate, %s color-aware\n", + i, i->single_rate ? "single" : "dual", + i->color_aware ? "is" : "not"); + s = format (s, "cir %u tok/period, pir %u tok/period, scale %u\n", + i->cir_tokens_per_period, i->pir_tokens_per_period, + i->scale); + s = format (s, "cur lim %u, cur bkt %u, ext lim %u, ext bkt %u\n", + i->current_limit, + i->current_bucket, + i->extended_limit, + i->extended_bucket); + s = format (s, "last update %llu\n", i->last_update_time); + return s; +} + +static u8 * format_policer_round_type (u8 * s, va_list * va) +{ + sse2_qos_pol_cfg_params_st * c + = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (c->rnd_type == SSE2_QOS_ROUND_TO_CLOSEST) + s = format(s, "closest"); + else if (c->rnd_type == SSE2_QOS_ROUND_TO_UP) + s = format (s, "up"); + else if (c->rnd_type == SSE2_QOS_ROUND_TO_DOWN) + s = format (s, "down"); + else + s = format (s, "ILLEGAL"); + return s; +} + + +static u8 * format_policer_rate_type (u8 * s, va_list * va) +{ + sse2_qos_pol_cfg_params_st * c + = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (c->rate_type == SSE2_QOS_RATE_KBPS) + s = format (s, "kbps"); + else if (c->rate_type == SSE2_QOS_RATE_PPS) + s = format(s, "pps"); + else + s = format (s, "ILLEGAL"); + return s; +} + +static u8 * format_policer_type (u8 * s, va_list * va) +{ + sse2_qos_pol_cfg_params_st * c + = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (c->rfc == SSE2_QOS_POLICER_TYPE_1R2C) + s = format (s, "1r2c"); + + else if (c->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697) + s = format (s, "1r3c"); + + else if (c->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698) + s = format (s, "2r3c-2698"); + + else if (c->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115) + s = format (s, "2r3c-4115"); + + else if (c->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1) + s = format (s, "2r3c-mef5cf1"); + else + s = format (s, "ILLEGAL"); + return s; +} + +u8 * format_policer_config (u8 * s, va_list * va) +{ + sse2_qos_pol_cfg_params_st * c + = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + s = format (s, "type %U cir %u eir %u cb %u eb %u\n", + format_policer_type, c, + c->rb.kbps.cir_kbps, + c->rb.kbps.eir_kbps, + c->rb.kbps.cb_bytes, + c->rb.kbps.eb_bytes); + s = format (s, "rate type %U, round type %U\n", + format_policer_rate_type, c, + format_policer_round_type, c); + return s; +} + +static uword +unformat_policer_type (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st * c + = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (!unformat (input, "type")) + return 0; + + if (unformat (input, "1r2c")) + c->rfc = SSE2_QOS_POLICER_TYPE_1R2C; + else if (unformat (input, "1r3c")) + c->rfc = SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697; + else if (unformat (input, "2r3c-2698")) + c->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698; + else if (unformat (input, "2r3c-4115")) + c->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115; + else if (unformat (input, "2r3c-mef5cf1")) + c->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1; + else + return 0; + return 1; +} + +static uword +unformat_policer_round_type (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st * c + = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (!unformat(input, "round")) + return 0; + + if (unformat(input, "closest")) + c->rnd_type = SSE2_QOS_ROUND_TO_CLOSEST; + else if (unformat (input, "up")) + c->rnd_type = SSE2_QOS_ROUND_TO_UP; + else if (unformat (input, "down")) + c->rnd_type = SSE2_QOS_ROUND_TO_DOWN; + else + return 0; + return 1; +} + +static uword +unformat_policer_rate_type (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st * c + = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (!unformat(input, "rate")) + return 0; + + if (unformat (input, "kbps")) + c->rate_type = SSE2_QOS_RATE_KBPS; + else if (unformat(input, "pps")) + c->rate_type = SSE2_QOS_RATE_PPS; + else + return 0; + return 1; +} + +static uword +unformat_policer_cir (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st * c + = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (unformat (input, "cir %u", &c->rb.kbps.cir_kbps)) + return 1; + return 0; +} + +static uword +unformat_policer_eir (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st * c + = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (unformat (input, "eir %u", &c->rb.kbps.eir_kbps)) + return 1; + return 0; +} + +static uword +unformat_policer_cb (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st * c + = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (unformat (input, "cb %u", &c->rb.kbps.cb_bytes)) + return 1; + return 0; +} + +static uword +unformat_policer_eb (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st * c + = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (unformat (input, "eb %u", &c->rb.kbps.eb_bytes)) + return 1; + return 0; +} + + +#define foreach_config_param \ +_(eb) \ +_(cb) \ +_(eir) \ +_(cir) \ +_(rate_type) \ +_(round_type) \ +_(type) + +static clib_error_t * +configure_policer_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_policer_main_t *pm = &vnet_policer_main; + sse2_qos_pol_cfg_params_st c; + policer_read_response_type_st test_policer; + unformat_input_t _line_input, * line_input = &_line_input; + int is_add = 1; + int rv; + u8 * name = 0; + uword * p; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + memset (&c, 0, sizeof (c)); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "del")) + is_add = 0; + else if (unformat(line_input, "name %s", &name)) + ; + +#define _(a) else if (unformat (line_input, "%U", unformat_policer_##a, &c)) ; + foreach_config_param +#undef _ + + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (is_add == 0) + { + p = hash_get_mem (pm->policer_config_by_name, name); + if (p == 0) + { + vec_free(name); + return clib_error_return (0, "No such policer configuration"); + } + hash_unset_mem (pm->policer_config_by_name, name); + vec_free(name); + return 0; + } + + /* Vet the configuration before adding it to the table */ + rv = sse2_pol_logical_2_physical (&c, &test_policer); + + if (rv == 0) + { + policer_read_response_type_st *pp; + sse2_qos_pol_cfg_params_st *cp; + + pool_get (pm->configs, cp); + pool_get (pm->policer_templates, pp); + + ASSERT (cp - pm->configs == pp - pm->policer_templates); + + memcpy (cp, &c, sizeof (*cp)); + memcpy (pp, &test_policer, sizeof (*pp)); + + hash_set_mem (pm->policer_config_by_name, name, cp - pm->configs); + } + else + { + vec_free (name); + return clib_error_return (0, "Config failed sanity check"); + } + + return 0; +} + +VLIB_CLI_COMMAND (configure_policer_command, static) = { + .path = "configure policer", + .short_help = "configure policer name <name> <params> ", + .function = configure_policer_command_fn, +}; + + +static clib_error_t * +show_policer_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_policer_main_t *pm = &vnet_policer_main; + hash_pair_t * p; + u32 pool_index; + u8 * match_name = 0; + u8 * name; + sse2_qos_pol_cfg_params_st *config; + policer_read_response_type_st *templ; + + (void) unformat (input, "name %s", &match_name); + + hash_foreach_pair (p, pm->policer_config_by_name, + ({ + name = (u8 *) p->key; + if (match_name == 0 || !strcmp((char *) name, (char *) match_name)) + { + pool_index = p->value[0]; + config = pool_elt_at_index (pm->configs, pool_index); + templ = pool_elt_at_index (pm->policer_templates, pool_index); + vlib_cli_output (vm, "Name \"%s\" %U ", + name, format_policer_config, config); + vlib_cli_output (vm, "Template %U", + format_policer_instance, templ); + vlib_cli_output (vm, "-----------"); + } + })); + return 0; +} + + +VLIB_CLI_COMMAND (show_policer_command, static) = { + .path = "show policer", + .short_help = "show policer [name]", + .function = show_policer_command_fn, +}; + +clib_error_t *policer_init (vlib_main_t * vm) +{ + vnet_policer_main_t * pm = &vnet_policer_main; + void vnet_policer_node_funcs_reference(void); + + vnet_policer_node_funcs_reference(); + + pm->vlib_main = vm; + pm->vnet_main = vnet_get_main(); + + pm->policer_config_by_name = hash_create_string (0, sizeof (uword)); + return 0; +} + +VLIB_INIT_FUNCTION(policer_init); + + diff --git a/vnet/vnet/policer/policer.h b/vnet/vnet/policer/policer.h new file mode 100644 index 00000000000..f91521e2268 --- /dev/null +++ b/vnet/vnet/policer/policer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_policer_h__ +#define __included_policer_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +#include <vnet/policer/xlate.h> +#include <vnet/policer/police.h> + +typedef struct { + /* policer pool, aligned */ + policer_read_response_type_st * policers; + + /* config + template h/w policer instance parallel pools */ + sse2_qos_pol_cfg_params_st * configs; + policer_read_response_type_st * policer_templates; + + /* Config by name hash */ + uword * policer_config_by_name; + + /* Policer by sw_if_index vector */ + u32 * policer_index_by_sw_if_index; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} vnet_policer_main_t; + +vnet_policer_main_t vnet_policer_main; + +typedef enum { + VNET_POLICER_INDEX_BY_SW_IF_INDEX, + VNET_POLICER_INDEX_BY_OPAQUE, + VNET_POLICER_INDEX_BY_EITHER, +} vnet_policer_index_t; + +typedef +enum { + VNET_POLICER_NEXT_TRANSMIT, + VNET_POLICER_NEXT_DROP, + VNET_POLICER_N_NEXT, +} vnet_policer_next_t; + +u8 * format_policer_instance (u8 * s, va_list * va); + +#endif /* __included_policer_h__ */ diff --git a/vnet/vnet/policer/xlate.c b/vnet/vnet/policer/xlate.c new file mode 100644 index 00000000000..761e8214413 --- /dev/null +++ b/vnet/vnet/policer/xlate.c @@ -0,0 +1,1380 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <string.h> +#include <stddef.h> +#include <stdio.h> +#include <assert.h> +#include <math.h> + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +#include <vnet/policer/xlate.h> +#include <vnet/policer/police.h> + +#define INTERNAL_SS 1 + +/* debugs */ +#define SSE2_QOS_DEBUG_ERROR(msg, args...) \ + fformat(stderr, msg "\n", ##args); + +#define SSE2_QOS_DEBUG_INFO(msg, args...) \ + fformat(stderr, msg "\n", ##args); + + +#define SSE2_QOS_TR_ERR(TpParms...) +// { +// } + +#define SSE2_QOS_TR_INFO(TpParms...) + +#ifndef MIN +#define MIN(x,y) (((x)<(y))?(x):(y)) +#endif + +#ifndef MAX +#define MAX(x,y) (((x)>(y))?(x):(y)) +#endif + +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AH_OFFSET 0 +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AH_MASK 8 +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AH_SHIFT 24 + +#define IPE_POLICER_FULL_WRITE_REQUEST_TYPE_OFFSET 2 +#define IPE_POLICER_FULL_WRITE_REQUEST_TYPE_MASK 2 +#define IPE_POLICER_FULL_WRITE_REQUEST_TYPE_SHIFT 10 + +#define IPE_POLICER_FULL_WRITE_REQUEST_CMD_OFFSET 3 +#define IPE_POLICER_FULL_WRITE_REQUEST_CMD_MASK 2 +#define IPE_POLICER_FULL_WRITE_REQUEST_CMD_SHIFT 0 + +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AL_OFFSET 4 +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AL_MASK 32 +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AL_SHIFT 0 + +#define IPE_POLICER_FULL_WRITE_REQUEST_RFC_OFFSET 8 +#define IPE_POLICER_FULL_WRITE_REQUEST_RFC_MASK 2 +#define IPE_POLICER_FULL_WRITE_REQUEST_RFC_SHIFT 30 + +#define IPE_POLICER_FULL_WRITE_REQUEST_AN_OFFSET 8 +#define IPE_POLICER_FULL_WRITE_REQUEST_AN_MASK 1 +#define IPE_POLICER_FULL_WRITE_REQUEST_AN_SHIFT 29 + +#define IPE_POLICER_FULL_WRITE_REQUEST_REXP_OFFSET 8 +#define IPE_POLICER_FULL_WRITE_REQUEST_REXP_MASK 4 +#define IPE_POLICER_FULL_WRITE_REQUEST_REXP_SHIFT 22 + +#define IPE_POLICER_FULL_WRITE_REQUEST_ARM_OFFSET 9 +#define IPE_POLICER_FULL_WRITE_REQUEST_ARM_MASK 11 +#define IPE_POLICER_FULL_WRITE_REQUEST_ARM_SHIFT 11 + +#define IPE_POLICER_FULL_WRITE_REQUEST_PRM_OFFSET 10 +#define IPE_POLICER_FULL_WRITE_REQUEST_PRM_MASK 11 +#define IPE_POLICER_FULL_WRITE_REQUEST_PRM_SHIFT 0 + +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLE_OFFSET 12 +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLE_MASK 5 +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLE_SHIFT 27 + +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLM_OFFSET 12 +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLM_MASK 7 +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLM_SHIFT 20 + +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLE_OFFSET 13 +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLE_MASK 5 +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLE_SHIFT 15 + +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLM_OFFSET 14 +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLM_MASK 7 +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLM_SHIFT 8 + +#define IPE_POLICER_FULL_WRITE_REQUEST_CB_OFFSET 16 +#define IPE_POLICER_FULL_WRITE_REQUEST_CB_MASK 31 +#define IPE_POLICER_FULL_WRITE_REQUEST_CB_SHIFT 0 + +#define IPE_POLICER_FULL_WRITE_REQUEST_EB_OFFSET 20 +#define IPE_POLICER_FULL_WRITE_REQUEST_EB_MASK 31 +#define IPE_POLICER_FULL_WRITE_REQUEST_EB_SHIFT 0 + +#define IPE_RFC_RFC2697 0x00000000 +#define IPE_RFC_RFC2698 0x00000001 +#define IPE_RFC_RFC4115 0x00000002 +#define IPE_RFC_MEF5CF1 0x00000003 + +/* End of constants copied from sse_ipe_desc_fmt.h */ + +/* Misc Policer specific definitions */ +#define SSE2_QOS_POLICER_FIXED_PKT_SIZE 256 + +// TODO check what can be provided by hw macro based on ASIC +#define SSE2_QOS_POL_TICKS_PER_SEC 1000LL /* 1 tick = 1 ms */ + +/* + * Default burst, in ms (byte format) + */ +#define SSE2_QOS_POL_DEF_BURST_BYTE 100 + +/* + * Minimum burst needs to be such that the largest packet size is accomodated + */ +// Do we need to get it from some lib? +#define SSE2_QOS_POL_MIN_BURST_BYTE 9*1024 + + +/* + * Flag to indicate if AN is employed or not + * 1 - TRUE, 0 - FALSE + */ +#define SSE2_QOS_POL_ALLOW_NEGATIVE 1 + +// Various Macros to take care of policer calculations + +#define SSE2_QOS_POL_COMM_BKT_MAX \ + (1<<IPE_POLICER_FULL_WRITE_REQUEST_CB_MASK) +#define SSE2_QOS_POL_EXTD_BKT_MAX \ + (1<<IPE_POLICER_FULL_WRITE_REQUEST_EB_MASK) + +#define SSE2_QOS_POL_RATE_EXP_SIZE \ + (IPE_POLICER_FULL_WRITE_REQUEST_REXP_MASK) +#define SSE2_QOS_POL_RATE_EXP_MAX ((1<<SSE2_QOS_POL_RATE_EXP_SIZE) - 1) +#define SSE2_QOS_POL_AVG_RATE_MANT_SIZE \ + (IPE_POLICER_FULL_WRITE_REQUEST_ARM_MASK) +#define SSE2_QOS_POL_AVG_RATE_MANT_MAX \ + ((1<< SSE2_QOS_POL_AVG_RATE_MANT_SIZE) - 1) +#define SSE2_QOS_POL_AVG_RATE_MAX \ + (SSE2_QOS_POL_AVG_RATE_MANT_MAX << \ + SSE2_QOS_POL_RATE_EXP_MAX) + +#define SSE2_QOS_POL_PEAK_RATE_MANT_SIZE \ + (IPE_POLICER_FULL_WRITE_REQUEST_PRM_MASK) +#define SSE2_QOS_POL_PEAK_RATE_MANT_MAX \ + ((1<<SSE2_QOS_POL_PEAK_RATE_MANT_SIZE) - 1) +#define SSE2_QOS_POL_PEAK_RATE_MAX \ + (SSE2_QOS_POL_PEAK_RATE_MANT_MAX << \ + SSE2_QOS_POL_RATE_EXP_MAX) + +#define SSE2_QOS_POL_COMM_BKT_LIMIT_MANT_SIZE \ + (IPE_POLICER_FULL_WRITE_REQUEST_CBLM_MASK) +#define SSE2_QOS_POL_COMM_BKT_LIMIT_MANT_MAX \ + ((1<<SSE2_QOS_POL_COMM_BKT_LIMIT_MANT_SIZE) - 1) +#define SSE2_QOS_POL_COMM_BKT_LIMIT_EXP_SIZE \ + (IPE_POLICER_FULL_WRITE_REQUEST_CBLE_MASK) +#define SSE2_QOS_POL_COMM_BKT_LIMIT_EXP_MAX \ + ((1<<SSE2_QOS_POL_COMM_BKT_LIMIT_EXP_SIZE) - 1) +#define SSE2_QOS_POL_COMM_BKT_LIMIT_MAX \ + ((uint64_t)SSE2_QOS_POL_COMM_BKT_LIMIT_MANT_MAX << \ + (uint64_t)SSE2_QOS_POL_COMM_BKT_LIMIT_EXP_MAX) + +#define SSE2_QOS_POL_EXTD_BKT_LIMIT_MANT_SIZE \ + (IPE_POLICER_FULL_WRITE_REQUEST_EBLM_MASK) +#define SSE2_QOS_POL_EXTD_BKT_LIMIT_MANT_MAX \ + ((1<<SSE2_QOS_POL_EXTD_BKT_LIMIT_MANT_SIZE) - 1) +#define SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_SIZE \ + (IPE_POLICER_FULL_WRITE_REQUEST_EBLE_MASK) +#define SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_MAX \ + ((1<<SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_SIZE) - 1) +#define SSE2_QOS_POL_EXT_BKT_LIMIT_MAX \ + ((uint64_t)SSE2_QOS_POL_EXTD_BKT_LIMIT_MANT_MAX << \ + (uint64_t)SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_MAX) + +/* + * Rates determine the units of the bucket + * 256.114688 Gbps < Rate 8 byte units + * 128.057344 Gbps < Rate <= 256.114688 Gbps 4 byte units + * 64.028672 Gbps < Rate <= 128.057344 Gbps 2 byte units + * Rate <= 64.028672 Gbps 1 byte units + * + * The code uses bytes per tick as oppose to Gigabits per second. + */ +#define RATE256 (256114688000LL / 8LL / SSE2_QOS_POL_TICKS_PER_SEC) +#define RATE128 (128057344000LL / 8LL / SSE2_QOS_POL_TICKS_PER_SEC) +#define RATE64 ( 64028672000LL / 8LL / SSE2_QOS_POL_TICKS_PER_SEC) + +#define RATE_OVER256_UNIT 8LL +#define RATE_128TO256_UNIT 4LL +#define RATE_64TO128_UNIT 2LL + +static cerrno +sse2_qos_pol_round (uint64_t numerator, + uint64_t denominator, + uint64_t *rounded_value, + sse2_qos_round_type_en round_type) +{ + cerrno rc = EOK; + + if (denominator == 0) { + SSE2_QOS_DEBUG_ERROR("Illegal denominator"); + SSE2_QOS_TR_ERR(SSE2_QOSRM_TP_ERR_59); + return(EINVAL); + } + + switch (round_type) { + case SSE2_QOS_ROUND_TO_CLOSEST: + *rounded_value = ((numerator + (denominator >> 1)) / denominator); + break; + + case SSE2_QOS_ROUND_TO_UP: + *rounded_value = (numerator / denominator); + if ((*rounded_value * denominator) < numerator) { + *rounded_value += 1; + } + break; + + case SSE2_QOS_ROUND_TO_DOWN: + *rounded_value = (numerator / denominator); + break; + + case SSE2_QOS_ROUND_INVALID: + default: + SSE2_QOS_DEBUG_ERROR("Illegal round type"); + SSE2_QOS_TR_ERR(SSE2_QOS_TP_ERR_60, round_type); + rc = EINVAL; + break; + } + return(rc); +} + + +static cerrno +sse2_pol_validate_cfg_params (sse2_qos_pol_cfg_params_st *cfg) +{ + uint64_t numer, denom, rnd_value; + uint32_t cir_hw, eir_hw; + cerrno rc = EOK; + + if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698) && + (cfg->rb.kbps.eir_kbps < cfg->rb.kbps.cir_kbps)) { + SSE2_QOS_DEBUG_ERROR("CIR (%u kbps) is greater than PIR (%u kbps)", + cfg->rb.kbps.cir_kbps, cfg->rb.kbps.eir_kbps); + SSE2_QOS_TR_ERR(SSE2_QOS_TP_ERR_39, cfg->rb.kbps.cir_kbps, + cfg->rb.kbps.eir_kbps); + return(EINVAL); + } + + /* + * convert rates to bytes-per-tick + */ + numer = (uint64_t)(cfg->rb.kbps.cir_kbps); + denom = (uint64_t)(8 * SSE2_QOS_POL_TICKS_PER_SEC) / 1000; + rc = sse2_qos_pol_round(numer, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + if (CERR_IS_NOTOK(rc)) { + SSE2_QOS_DEBUG_ERROR("Unable to convert CIR to bytes/tick format"); + // Error traced + return(rc); + } + cir_hw = (uint32_t)rnd_value; + + numer = (uint64_t)(cfg->rb.kbps.eir_kbps); + rc = sse2_qos_pol_round(numer, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + if (CERR_IS_NOTOK(rc)) { + SSE2_QOS_DEBUG_ERROR("Unable to convert EIR to bytes/tick format"); + // Error traced + return(rc); + } + eir_hw = (uint32_t)rnd_value; + + if (cir_hw > SSE2_QOS_POL_AVG_RATE_MAX) { + SSE2_QOS_DEBUG_ERROR("hw cir (%u bytes/tick) is greater than the " + "max supported value (%u)", cir_hw, + SSE2_QOS_POL_AVG_RATE_MAX); + SSE2_QOS_TR_ERR(SSE2_QOS_TP_ERR_84, cir_hw, + SSE2_QOS_POL_AVG_RATE_MAX); + return(EINVAL); + } + + if (eir_hw > SSE2_QOS_POL_PEAK_RATE_MAX) { + SSE2_QOS_DEBUG_ERROR("hw eir (%u bytes/tick) is greater than the " + "max supported value (%u). Capping it to the max. " + "supported value", eir_hw, SSE2_QOS_POL_PEAK_RATE_MAX); + SSE2_QOS_TR_ERR(SSE2_QOS_TP_ERR_85, + eir_hw, SSE2_QOS_POL_PEAK_RATE_MAX); + return(EINVAL); + } + /* + * CIR = 0, with bc != 0 is not allowed + */ + if ((cfg->rb.kbps.cir_kbps == 0) && cfg->rb.kbps.cb_bytes) { + SSE2_QOS_DEBUG_ERROR("CIR = 0 with bc != 0"); + SSE2_QOS_TR_ERR(SSE2_QOS_TP_ERR_55); + return(EINVAL); + } + + if ((cfg->rb.kbps.eir_kbps == 0) && + (cfg->rfc > SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697)) { + SSE2_QOS_DEBUG_ERROR("EIR = 0 for a 2R3C policer (rfc: %u)", cfg->rfc); + SSE2_QOS_TR_ERR(SSE2_QOS_TP_ERR_23, cfg->rb.kbps.eir_kbps, cfg->rfc); + return(EINVAL); + } + + if (cfg->rb.kbps.eir_kbps && + (cfg->rfc < SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698)) { + SSE2_QOS_DEBUG_ERROR("EIR: %u kbps for a 1-rate policer (rfc: %u)", + cfg->rb.kbps.eir_kbps, cfg->rfc); + SSE2_QOS_TR_ERR(SSE2_QOS_TP_ERR_23, cfg->rb.kbps.eir_kbps, cfg->rfc); + return(EINVAL); + } + + if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) && cfg->rb.kbps.eb_bytes) { + SSE2_QOS_DEBUG_ERROR("For a 1R1B policer, EB burst cannot be > 0"); + SSE2_QOS_TR_ERR(SSE2_QOS_TP_ERR_56); + return(EINVAL); + } + + return(EOK); +} + +static void +sse2_qos_convert_value_to_exp_mant_fmt (uint64_t value, + uint16_t max_exp_value, + uint16_t max_mant_value, + sse2_qos_round_type_en type, + uint8_t *exp, + uint32_t *mant) +{ + uint64_t rnd_value; + uint64_t temp_mant; + uint8_t temp_exp; + + /* + * Select the lowest possible exp, and the largest possible mant + */ + temp_exp = 0; + temp_mant = value; + while (temp_exp <= max_exp_value) { + if (temp_mant <= max_mant_value) { + break; + } + + temp_exp++; + rnd_value = 0; + (void)sse2_qos_pol_round((uint64_t)value, (uint64_t)(1 << temp_exp), + &rnd_value, type); + temp_mant = rnd_value; + } + + if (temp_exp > max_exp_value) { + /* + * CAP mant to its max value, and decrement exp + */ + temp_exp--; + temp_mant = max_mant_value; + } + + *exp = temp_exp; + *mant = (uint32_t)temp_mant; + + SSE2_QOS_DEBUG_INFO("value: 0x%llx, mant: %u, exp: %u", value, *mant, *exp); + return; +} + +static cerrno +sse2_pol_convert_cfg_rates_to_hw (sse2_qos_pol_cfg_params_st *cfg, + sse2_qos_pol_hw_params_st *hw) +{ + cerrno rc = EOK; + uint32_t cir_hw, eir_hw, hi_mant, hi_rate, cir_rnded, eir_rnded, eir_kbps; + uint64_t numer, denom, rnd_value; + uint8_t exp; + + /* + * convert rates to bytes-per-tick (tick is 1ms) + * For rate conversion, the denominator is gonna be the same + */ + denom = (uint64_t)((SSE2_QOS_POL_TICKS_PER_SEC * 8) / 1000); + numer = (uint64_t)(cfg->rb.kbps.cir_kbps); + rc = sse2_qos_pol_round(numer, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + if (CERR_IS_NOTOK(rc)) { + SSE2_QOS_DEBUG_ERROR("Rounding error, rate: %d kbps, rounding_type: %d", + cfg->rb.kbps.cir_kbps, cfg->rnd_type); + // Error is traced + return(rc); + } + cir_hw = (uint32_t)rnd_value; + + if (cfg->rb.kbps.cir_kbps && (cir_hw == 0)) { + /* + * After rounding, cir_hw = 0. Bump it up + */ + cir_hw = 1; + } + + if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) { + eir_kbps = 0; + } else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697) { + eir_kbps = cfg->rb.kbps.cir_kbps; + } else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115) { + eir_kbps = cfg->rb.kbps.eir_kbps - cfg->rb.kbps.cir_kbps; + } else { + eir_kbps = cfg->rb.kbps.eir_kbps; + } + + numer = (uint64_t)eir_kbps; + rc = sse2_qos_pol_round(numer, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + if (CERR_IS_NOTOK(rc)) { + SSE2_QOS_DEBUG_ERROR("Rounding error, rate: %d kbps, rounding_type: %d", + eir_kbps, cfg->rnd_type); + // Error is traced + return(rc); + } + eir_hw = (uint32_t)rnd_value; + + if (eir_kbps && (eir_hw == 0)) { + /* + * After rounding, eir_hw = 0. Bump it up + */ + eir_hw = 1; + } + + SSE2_QOS_DEBUG_INFO("cir_hw: %u bytes/tick, eir_hw: %u bytes/tick", cir_hw, + eir_hw); + + if (cir_hw > eir_hw) { + hi_rate = cir_hw; + } else { + hi_rate = eir_hw; + } + + if ((cir_hw == 0) && (eir_hw == 0)) { + /* + * Both the rates are 0. Use exp = 15, and set the RFC to 4115. Also + * set AN = 0 + */ + exp = (uint8_t)SSE2_QOS_POL_RATE_EXP_MAX; + hi_mant = 0; + hw->rfc = IPE_RFC_RFC4115; + hw->allow_negative = 0; + } else { + sse2_qos_convert_value_to_exp_mant_fmt(hi_rate, + (uint16_t)SSE2_QOS_POL_RATE_EXP_MAX, + (uint16_t)SSE2_QOS_POL_AVG_RATE_MANT_MAX, + (sse2_qos_round_type_en) cfg->rnd_type, + &exp, &hi_mant); + } + + denom = (1 << exp); + if (hi_rate == eir_hw) { + hw->peak_rate_man = (uint16_t)hi_mant; + rc = sse2_qos_pol_round((uint64_t)cir_hw, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + hw->avg_rate_man = (uint16_t)rnd_value; + } else { + hw->avg_rate_man = (uint16_t)hi_mant; + rc = sse2_qos_pol_round((uint64_t)eir_hw, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + hw->peak_rate_man = (uint16_t)rnd_value; + } + if (CERR_IS_NOTOK(rc)) { + SSE2_QOS_DEBUG_ERROR("Rounding error"); + // Error is traced + return(rc); + } + hw->rate_exp = exp; + + if ((hw->avg_rate_man == 0) && (cfg->rb.kbps.cir_kbps)) { + /* + * cir was reduced to 0 during rounding. Bump it up + */ + hw->avg_rate_man = 1; + SSE2_QOS_DEBUG_INFO("CIR = 0 during rounding. Bump it up to %u " + "bytes/tick", (hw->avg_rate_man << hw->rate_exp)); + } + + if ((hw->peak_rate_man == 0) && eir_kbps) { + /* + * eir was reduced to 0 during rounding. Bump it up + */ + hw->peak_rate_man = 1; + SSE2_QOS_DEBUG_INFO("EIR = 0 during rounding. Bump it up to %u " + "bytes/tick", (hw->peak_rate_man << hw->rate_exp)); + } + + cir_rnded = (hw->avg_rate_man << hw->rate_exp); + eir_rnded = (hw->peak_rate_man << hw->rate_exp); + + SSE2_QOS_DEBUG_INFO("Configured(rounded) values, cir: %u " + "kbps (mant: %u, exp: %u, rate: %u bytes/tick)", + cfg->rb.kbps.cir_kbps, hw->avg_rate_man, + hw->rate_exp, cir_rnded); + + SSE2_QOS_DEBUG_INFO("Configured(rounded) values, eir: %u " + "kbps (mant: %u, exp: %u, rate: %u bytes/tick)", + cfg->rb.kbps.eir_kbps, hw->peak_rate_man, + hw->rate_exp, eir_rnded); + + return(rc); +} + +/***** + * NAME + * sse2_pol_get_bkt_max + * + * PARAMETERS + * rate_hw - either the averate rate or peak rate + * bkt_max - bit width in the current bucket or extended bucket + * + * RETURNS + * uint64_t - maximum token bytes for the current or extended bucket + * + * DESCRIPTION + * The current bucket or extended bucket fields are in units of either + * 1,2,4,8 bytes based on the average or peak rate respective to current + * or extended bucket. + * + * To get the actual maximum number of bytes that can be stored in the + * field, the value must be multiplied by the units of either 1,2,4,8 + * bytes based on the rate. + *****/ +uint64_t +sse2_pol_get_bkt_max (uint64_t rate_hw, uint64_t bkt_max) +{ + if (rate_hw <= RATE64) { + return (bkt_max - 1); + } else if (rate_hw <= RATE128) { + return ((bkt_max * RATE_64TO128_UNIT) - RATE_64TO128_UNIT); + } else if (rate_hw <= RATE256) { + return ((bkt_max * RATE_128TO256_UNIT) - RATE_128TO256_UNIT); + } + /* rate must be over 256 */ + return ((bkt_max * RATE_OVER256_UNIT) - RATE_OVER256_UNIT); +} + +/***** + * NAME + * sse2_pol_get_bkt_value + * + * PARAMETERS + * rate_hw - either the averate rate or peak rate + * byte_value - bytes for this token bucket + * + * RETURNS + * uint64_t - unit value for the current or extended bucket field + * + * DESCRIPTION + * The current bucket or extended bucket fields are in units of either + * 1,2,4,8 bytes based on the average or peak rate respective to current + * or extended bucket. + * + * To get the units that can be stored in the field, the byte value must + * be divided by the units of either 1,2,4,8 bytes based on the rate. + *****/ +uint64_t +sse2_pol_get_bkt_value (uint64_t rate_hw, uint64_t byte_value) +{ + if (rate_hw <= RATE64) { + return (byte_value); + } else if (rate_hw <= RATE128) { + return (byte_value / RATE_64TO128_UNIT); + } else if (rate_hw <= RATE256) { + return (byte_value / RATE_128TO256_UNIT); + } + /* rate must be over 256 */ + return (byte_value / RATE_OVER256_UNIT); +} + +static void +sse2_pol_rnd_burst_byte_fmt (uint64_t cfg_burst, + uint16_t max_exp_value, + uint16_t max_mant_value, + uint32_t max_bkt_value, + uint32_t rate_hw, + uint8_t *exp, + uint32_t *mant, + uint32_t *bkt_value) +{ + uint64_t bkt_max=max_bkt_value; + uint64_t bkt_limit_max; + uint64_t rnd_burst; + uint64_t temp_bkt_value; + + bkt_limit_max = ((uint64_t)max_mant_value<<(uint64_t)max_exp_value); + bkt_max = sse2_pol_get_bkt_max(rate_hw, bkt_max); + bkt_max=MIN(bkt_max, bkt_limit_max); + if (!cfg_burst) { + /* + * If configured burst = 0, compute the burst to be 100ms at a given + * rate. Note that for rate_hw = 0, exp = mant = 0. + */ + cfg_burst = (uint64_t)rate_hw * (uint64_t)SSE2_QOS_POL_DEF_BURST_BYTE; + } + + if (cfg_burst > bkt_max) { + SSE2_QOS_DEBUG_ERROR("burst 0x%llx bytes is greater than the max. " + "supported value 0x%llx bytes. Capping it to the " + "max", cfg_burst, bkt_max); + SSE2_QOS_TR_INFO(SSE2_QOS_TP_INFO_38, + (uint)cfg_burst, (uint)bkt_max); + cfg_burst = bkt_max; + } + + if (cfg_burst < SSE2_QOS_POL_MIN_BURST_BYTE) { + /* + * Bump up the burst value ONLY if the cfg_burst is non-zero AND + * less than the min. supported value + */ + SSE2_QOS_DEBUG_INFO("burst 0x%llx bytes is less than the min " + "supported value %u bytes. Rounding it up to " + "the min", cfg_burst, SSE2_QOS_POL_MIN_BURST_BYTE); + SSE2_QOS_TR_INFO(SSE2_QOS_TP_INFO_39, (uint)cfg_burst, + SSE2_QOS_POL_MIN_BURST_BYTE); + cfg_burst = SSE2_QOS_POL_MIN_BURST_BYTE; + } + + sse2_qos_convert_value_to_exp_mant_fmt(cfg_burst, + max_exp_value, + max_mant_value, + SSE2_QOS_ROUND_TO_DOWN, + exp, + mant); + + /* Bucket value is based on rate. */ + rnd_burst = ((uint64_t)(*mant) << (uint64_t)(*exp)); + temp_bkt_value = sse2_pol_get_bkt_value(rate_hw, rnd_burst); + *bkt_value = (uint32_t)temp_bkt_value; +} + +static cerrno +sse2_pol_convert_cfg_burst_to_hw (sse2_qos_pol_cfg_params_st *cfg, + sse2_qos_pol_hw_params_st *hw) +{ + uint8_t temp_exp; + uint32_t temp_mant, rate_hw; + uint64_t eb_bytes; + uint32_t bkt_value; + + /* + * compute Committed Burst + */ + SSE2_QOS_DEBUG_INFO("Compute commit burst ..."); + rate_hw = (hw->avg_rate_man) << (hw->rate_exp); + sse2_pol_rnd_burst_byte_fmt(cfg->rb.kbps.cb_bytes, + (uint16_t)SSE2_QOS_POL_COMM_BKT_LIMIT_EXP_MAX, + (uint16_t)SSE2_QOS_POL_COMM_BKT_LIMIT_MANT_MAX, + (uint32_t)SSE2_QOS_POL_COMM_BKT_MAX, + rate_hw, &temp_exp, &temp_mant, &bkt_value); + SSE2_QOS_DEBUG_INFO("Committed burst, burst_limit: 0x%llx mant : %u, " + "exp: %u, rnded: 0x%llx cb:%u bytes", + cfg->rb.kbps.cb_bytes, temp_mant, temp_exp, + ((uint64_t)temp_mant << (uint64_t)temp_exp), bkt_value); + + hw->comm_bkt_limit_exp = temp_exp; + hw->comm_bkt_limit_man = (uint8_t)temp_mant; + hw->comm_bkt = bkt_value; + + /* + * compute Exceed Burst + */ + SSE2_QOS_DEBUG_INFO("Compute exceed burst ..."); + + if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) { + /* + * For 1R2C, hw uses 2R3C (RFC-4115). As such, the Exceed Bucket + * params are set to 0. Recommendation is to use EB_exp = max_exp (=15) + * and EB_mant = 0 + */ + hw->extd_bkt_limit_exp = (uint8_t)SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_MAX; + hw->extd_bkt_limit_man = 0; + SSE2_QOS_DEBUG_INFO("Excess burst, burst: 0x%llx mant: %u, " + "exp: %u, rnded: 0x%llx bytes", + cfg->rb.kbps.eb_bytes, hw->extd_bkt_limit_man, + hw->extd_bkt_limit_exp, + ((uint64_t)hw->extd_bkt_limit_man << + (uint64_t)hw->extd_bkt_limit_exp)); + SSE2_QOS_TR_INFO(SSE2_QOS_TP_INFO_20, (uint)cfg->rb.kbps.eb_bytes, + hw->extd_bkt_limit_man, hw->extd_bkt_limit_exp); + return(EOK); + } + + if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697) { + eb_bytes = cfg->rb.kbps.cb_bytes + cfg->rb.kbps.eb_bytes; + } else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115) { + eb_bytes = cfg->rb.kbps.eb_bytes - cfg->rb.kbps.cb_bytes; + } else { + eb_bytes = cfg->rb.kbps.eb_bytes; + } + + rate_hw = (hw->peak_rate_man) << (hw->rate_exp); + sse2_pol_rnd_burst_byte_fmt(eb_bytes, + (uint16_t)SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_MAX, + (uint16_t)SSE2_QOS_POL_EXTD_BKT_LIMIT_MANT_MAX, + (uint32_t)SSE2_QOS_POL_EXTD_BKT_MAX, + rate_hw, &temp_exp, &temp_mant, &bkt_value); + + SSE2_QOS_DEBUG_INFO("Excess burst, burst_limit: 0x%llx mant: %u, " + "exp: %u, rnded: 0x%llx eb:%u bytes", + cfg->rb.kbps.eb_bytes, temp_mant, temp_exp, + ((uint64_t)temp_mant << (uint64_t)temp_exp), bkt_value); + + hw->extd_bkt_limit_exp = (uint8_t)temp_exp; + hw->extd_bkt_limit_man = (uint8_t)temp_mant; + hw->extd_bkt = bkt_value; + + return(EOK); +} + + +/* + * Input: configured parameter values in 'cfg'. + * Output: h/w programmable parameter values in 'hw'. + * Return: success or failure code. + */ +static cerrno +sse2_pol_convert_cfg_to_hw_params (sse2_qos_pol_cfg_params_st *cfg, + sse2_qos_pol_hw_params_st *hw) +{ + cerrno rc = EOK; + + /* + * clear the hw_params + */ + memset(hw, 0, sizeof(sse2_qos_pol_hw_params_st)); + + hw->allow_negative = SSE2_QOS_POL_ALLOW_NEGATIVE; + + if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) || + (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115)) { + hw->rfc = IPE_RFC_RFC4115; + } else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697) { + hw->rfc = IPE_RFC_RFC2697; + } else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698) { + hw->rfc = IPE_RFC_RFC2698; + } else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1) { + hw->rfc = IPE_RFC_MEF5CF1; + } else { + SSE2_QOS_DEBUG_ERROR("Invalid RFC type %d\n", cfg->rfc); + SSE2_QOS_TR_ERR(SSE2_QOS_TP_ERR_61, cfg->rfc); + return(EINVAL); + } + + rc = sse2_pol_convert_cfg_rates_to_hw(cfg, hw); + if (CERR_IS_NOTOK(rc)) { + SSE2_QOS_DEBUG_ERROR("Unable to convert config rates to hw. Error: %d", + rc); + // Error is traced + return(rc); + } + + rc = sse2_pol_convert_cfg_burst_to_hw(cfg, hw); + if (CERR_IS_NOTOK(rc)) { + SSE2_QOS_DEBUG_ERROR("Unable to convert config burst to hw. Error: %d", + rc); + // Error is traced + return(rc); + } + + return OK_pushHW; +} + + +uint32_t +sse2_qos_convert_pps_to_kbps (uint32_t rate_pps) +{ + // sse2_qos_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT, + // SSE2_QOS_SHIP_CNT_POL_CONV_PPS_TO_KBPS); + + uint64_t numer, rnd_value = 0; + + numer = (uint64_t)((uint64_t)rate_pps * + (uint64_t)SSE2_QOS_POLICER_FIXED_PKT_SIZE * 8LL); + (void)sse2_qos_pol_round(numer, 1000LL, &rnd_value, + SSE2_QOS_ROUND_TO_CLOSEST); + + return ((uint32_t)rnd_value); +} + +uint32_t +sse2_qos_convert_burst_ms_to_bytes (uint32_t burst_ms, + uint32_t rate_kbps) +{ + uint64_t numer, rnd_value = 0; + + //sse2_qos_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT, + // SSE2_QOS_SHIP_CNT_POL_CONV_BURST_MS_TO_BYTES); + + numer = (uint64_t)((uint64_t)burst_ms * (uint64_t)rate_kbps); + + (void)sse2_qos_pol_round(numer, 8LL, &rnd_value, + SSE2_QOS_ROUND_TO_CLOSEST); + + return ((uint32_t)rnd_value); +} + + +/* + * Input: configured parameters in 'cfg'. + * Output: h/w parameters are returned in 'hw', + * Return: Status, success or failure code. + */ +trans_layer_rc +sse2_pol_compute_hw_params (sse2_qos_pol_cfg_params_st *cfg, + sse2_qos_pol_hw_params_st *hw) +{ + cerrno rc = EOK; + + if (!cfg || !hw) { + SSE2_QOS_DEBUG_ERROR("Illegal parameters"); + return(Not_OK); + } + + /* + * Validate the police config params being presented to RM + */ + rc = sse2_pol_validate_cfg_params(cfg); + if (CERR_IS_NOTOK(rc)) { + SSE2_QOS_DEBUG_ERROR("Config parameter validation failed. Error: %d", + rc); + // Error is traced + return(Not_OK); + } + + /* + * first round configured values to h/w supported values. This func + * also determines whether 'tick' or 'byte' format + */ + rc = sse2_pol_convert_cfg_to_hw_params(cfg, hw); + if (CERR_IS_NOTOK(rc)) { + SSE2_QOS_DEBUG_ERROR("Unable to convert config params to hw params. " + "Error: %d", rc); + SSE2_QOS_TR_ERR(SSE2_QOS_TP_ERR_53, rc); + return(Not_OK); + } + + return OK_pushHW; +} + + +#if defined (INTERNAL_SS) || defined (X86) + +// For initializing the x86 policer format + +/* + * Return the number of hardware TSC timer ticks per second for the dataplane. + * This is approximately, but not exactly, the clock speed. + */ +static uint64_t get_tsc_hz(void) +{ + f64 cpu_freq; + + cpu_freq = os_cpu_clock_frequency(); + return (uint64_t) cpu_freq; +} + +/* + * Convert rates into bytes_per_period and scale. + * Return 0 if ok or 1 if error. + */ +static int +compute_policer_params (uint64_t hz, // CPU speed in clocks per second + uint64_t cir_rate, // in bytes per second + uint64_t pir_rate, // in bytes per second + uint32_t *current_limit, // in bytes, output may scale the input + uint32_t *extended_limit, // in bytes, output may scale the input + uint32_t *cir_bytes_per_period, + uint32_t *pir_bytes_per_period, + uint32_t *scale) +{ + double period; + double internal_cir_bytes_per_period; + double internal_pir_bytes_per_period; + uint32_t max; + uint32_t scale_shift; + uint32_t scale_amount; + uint32_t orig_current_limit = *current_limit; + + // Compute period. For 1Ghz-to-8Ghz CPUs, the period will be in + // the range of 16 to 116 usec. + period = ((double) hz) / ((double) POLICER_TICKS_PER_PERIOD); + + // Determine bytes per period for each rate + internal_cir_bytes_per_period = (double)cir_rate / period; + internal_pir_bytes_per_period = (double)pir_rate / period; + + // Scale if possible. Scaling helps rate accuracy, but is contrained + // by the scaled rates and limits fitting in 32-bits. + // In addition, we need to insure the scaled rate is no larger than + // 2^22 tokens per period. This allows the dataplane to ignore overflow + // in the tokens-per-period multiplication since it could only + // happen if the policer were idle for more than a year. + // This is not really a constraint because 100Gbps at 1Ghz is only + // 1.6M tokens per period. +#define MAX_RATE_SHIFT 10 + max = MAX(*current_limit, *extended_limit); + max = MAX(max, (uint32_t)internal_cir_bytes_per_period << MAX_RATE_SHIFT); + max = MAX(max, (uint32_t)internal_pir_bytes_per_period << MAX_RATE_SHIFT); + scale_shift = __builtin_clz(max); + + scale_amount = 1 << scale_shift; + *scale = scale_shift; + + // Scale the limits + *current_limit = *current_limit << scale_shift; + *extended_limit = *extended_limit << scale_shift; + + // Scale the rates + internal_cir_bytes_per_period = internal_cir_bytes_per_period * ((double)scale_amount); + internal_pir_bytes_per_period = internal_pir_bytes_per_period * ((double)scale_amount); + + // Make sure the new rates are reasonable + // Only needed for very low rates with large bursts + if (internal_cir_bytes_per_period < 1.0) { + internal_cir_bytes_per_period = 1.0; + } + if (internal_pir_bytes_per_period < 1.0) { + internal_pir_bytes_per_period = 1.0; + } + + *cir_bytes_per_period = (uint32_t)internal_cir_bytes_per_period; + *pir_bytes_per_period = (uint32_t)internal_pir_bytes_per_period; + +// #define PRINT_X86_POLICE_PARAMS +#ifdef PRINT_X86_POLICE_PARAMS + { + uint64_t effective_BPS; + + // This value actually slightly conservative because it doesn't take into account + // the partial period at the end of a second. This really matters only for very low + // rates. + effective_BPS = (((uint64_t) (*cir_bytes_per_period * (uint64_t)period)) >> *scale ); + + printf("hz=%llu, cir_rate=%llu, limit=%u => " + "periods-per-sec=%d usec-per-period=%d => " + "scale=%d cir_BPP=%u, scaled_limit=%u => " + "effective BPS=%llu, accuracy=%f\n", + // input values + (unsigned long long)hz, + (unsigned long long)cir_rate, + orig_current_limit, + // computed values + (uint32_t)(period), // periods per second + (uint32_t)(1000.0 * 1000.0 / period), // in usec + *scale, + *cir_bytes_per_period, + *current_limit, + // accuracy + (unsigned long long)effective_BPS, + (double)cir_rate / (double)effective_BPS); + } +#else + orig_current_limit = orig_current_limit; // Make compiler happy +#endif + + return 0; // ok +} + + +/* + * Input: configured parameters in 'cfg'. + * Output: h/w parameters are returned in 'hw', + * Return: Status, success or failure code. + */ +trans_layer_rc +x86_pol_compute_hw_params (sse2_qos_pol_cfg_params_st *cfg, + policer_read_response_type_st *hw) +{ + const int BYTES_PER_KBIT = (1000 / 8); + uint64_t hz; + uint32_t cap; + + if (!cfg || !hw) { + SSE2_QOS_DEBUG_ERROR("Illegal parameters"); + return(Not_OK); + } + + hz = get_tsc_hz(); + hw->last_update_time = 0; + + // Cap the bursts to 32-bits. This allows up to almost one second of + // burst on a 40GE interface, which should be fine for x86. + cap = (cfg->rb.kbps.cb_bytes > 0xFFFFFFFF) ? 0xFFFFFFFF : cfg->rb.kbps.cb_bytes; + hw->current_limit = cap; + cap = (cfg->rb.kbps.eb_bytes > 0xFFFFFFFF) ? 0xFFFFFFFF : cfg->rb.kbps.eb_bytes; + hw->extended_limit = cap; + + if ((cfg->rb.kbps.cir_kbps == 0) && (cfg->rb.kbps.cb_bytes == 0) && (cfg->rb.kbps.eb_bytes == 0)) { + // This is a uninitialized, always-violate policer + hw->single_rate = 1; + hw->cir_tokens_per_period = 0; + return OK_pushHW; + } + + if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) || + (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697)) { + // Single-rate policer + + hw->single_rate = 1; + + if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) && cfg->rb.kbps.eb_bytes) { + SSE2_QOS_DEBUG_ERROR("Policer parameter validation failed -- 1R2C."); + return(Not_OK); + } + + if ((cfg->rb.kbps.cir_kbps == 0) || + (cfg->rb.kbps.eir_kbps != 0) || + ((cfg->rb.kbps.cb_bytes == 0) && (cfg->rb.kbps.eb_bytes == 0))) { + SSE2_QOS_DEBUG_ERROR("Policer parameter validation failed -- 1R."); + return(Not_OK); + } + + if (compute_policer_params(hz, + (uint64_t)cfg->rb.kbps.cir_kbps * BYTES_PER_KBIT, + 0, + &hw->current_limit, + &hw->extended_limit, + &hw->cir_tokens_per_period, + &hw->pir_tokens_per_period, + &hw->scale)) { + SSE2_QOS_DEBUG_ERROR("Policer parameter computation failed."); + return(Not_OK); + } + + } else if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698) || + (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115)) { + // Two-rate policer + + if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115) { + hw->color_aware = 1; + } + + if ((cfg->rb.kbps.cir_kbps == 0) || (cfg->rb.kbps.eir_kbps == 0) || (cfg->rb.kbps.eir_kbps < cfg->rb.kbps.cir_kbps) || + (cfg->rb.kbps.cb_bytes == 0) || (cfg->rb.kbps.eb_bytes == 0)) { + SSE2_QOS_DEBUG_ERROR("Config parameter validation failed."); + return(Not_OK); + } + + if (compute_policer_params(hz, + (uint64_t)cfg->rb.kbps.cir_kbps * BYTES_PER_KBIT, + (uint64_t)cfg->rb.kbps.eir_kbps * BYTES_PER_KBIT, + &hw->current_limit, + &hw->extended_limit, + &hw->cir_tokens_per_period, + &hw->pir_tokens_per_period, + &hw->scale)) { + SSE2_QOS_DEBUG_ERROR("Policer parameter computation failed."); + return(Not_OK); + } + + } else { + SSE2_QOS_DEBUG_ERROR("Config parameter validation failed. RFC not supported"); + return(Not_OK); + } + + hw->current_bucket = hw->current_limit; + hw->extended_bucket = hw->extended_limit; + + return OK_pushHW; +} +#endif + + +/* + * Input: configured parameters in 'cfg'. + * Output: physical structure is returned in 'phys', + * Return: Status, success or failure code. + */ +trans_layer_rc +sse2_pol_logical_2_physical (sse2_qos_pol_cfg_params_st *cfg, + policer_read_response_type_st *phys) +{ + trans_layer_rc rc; + sse2_qos_pol_hw_params_st pol_hw; + sse2_qos_pol_cfg_params_st kbps_cfg; + + memset(phys, 0, sizeof(policer_read_response_type_st)); + memset(&kbps_cfg, 0, sizeof(sse2_qos_pol_cfg_params_st)); + + if (!cfg) { + SSE2_QOS_DEBUG_ERROR("Illegal parameters"); + return(Not_OK); + } + + switch (cfg->rate_type) { + case SSE2_QOS_RATE_KBPS: + /* copy all the data into kbps_cfg */ + kbps_cfg.rb.kbps.cir_kbps = cfg->rb.kbps.cir_kbps; + kbps_cfg.rb.kbps.eir_kbps = cfg->rb.kbps.eir_kbps; + kbps_cfg.rb.kbps.cb_bytes = cfg->rb.kbps.cb_bytes; + kbps_cfg.rb.kbps.eb_bytes = cfg->rb.kbps.eb_bytes; + break; + case SSE2_QOS_RATE_PPS: + kbps_cfg.rb.kbps.cir_kbps = + sse2_qos_convert_pps_to_kbps(cfg->rb.pps.cir_pps); + kbps_cfg.rb.kbps.eir_kbps = + sse2_qos_convert_pps_to_kbps(cfg->rb.pps.eir_pps); + kbps_cfg.rb.kbps.cb_bytes = sse2_qos_convert_burst_ms_to_bytes( + (uint32_t) cfg->rb.pps.cb_ms, kbps_cfg.rb.kbps.cir_kbps); + kbps_cfg.rb.kbps.eb_bytes = sse2_qos_convert_burst_ms_to_bytes( + (uint32_t) cfg->rb.pps.eb_ms, kbps_cfg.rb.kbps.eir_kbps); + break; + default: + SSE2_QOS_DEBUG_ERROR("Illegal rate type"); + return(Not_OK); + } + + /* rate type is now converted to kbps */ + kbps_cfg.rate_type = SSE2_QOS_RATE_KBPS; + kbps_cfg.rnd_type = cfg->rnd_type; + kbps_cfg.rfc = cfg->rfc; + +#if !defined (INTERNAL_SS) && !defined (X86) + // convert logical into hw params which involves qos calculations + rc = sse2_pol_compute_hw_params(&kbps_cfg, &pol_hw); + if (rc == Not_OK) { + SSE2_QOS_DEBUG_ERROR("Unable to compute hw param. Error: %d", rc); + return (rc); + } + + // convert hw params into the physical + phys->rfc = pol_hw.rfc; + phys->an = pol_hw.allow_negative; + phys->rexp = pol_hw.rate_exp; + phys->arm = pol_hw.avg_rate_man; + phys->prm = pol_hw.peak_rate_man; + phys->cble = pol_hw.comm_bkt_limit_exp; + phys->cblm = pol_hw.comm_bkt_limit_man; + phys->eble = pol_hw.extd_bkt_limit_exp; + phys->eblm = pol_hw.extd_bkt_limit_man; + phys->cb = pol_hw.comm_bkt; + phys->eb = pol_hw.extd_bkt; + + /* for debugging purposes, the bucket token values can be overwritten */ + if (cfg->overwrite_bucket) { + phys->cb = cfg->current_bucket; + phys->eb = cfg->extended_bucket; + } +#else + // convert logical into hw params which involves qos calculations + rc = x86_pol_compute_hw_params(&kbps_cfg, phys); + if (rc == Not_OK) { + SSE2_QOS_DEBUG_ERROR("Unable to compute hw param. Error: %d", rc); + return (rc); + } + + /* for debugging purposes, the bucket token values can be overwritten */ + if (cfg->overwrite_bucket) { + phys->current_bucket = cfg->current_bucket; + phys->extended_bucket = cfg->extended_bucket; + } + + // Touch to avoid compiler warning for X86 + pol_hw.allow_negative = pol_hw.allow_negative; + +#endif // if !defined (INTERNAL_SS) && !defined (X86) + + return OK_pushHW; +} + + +static void +sse2_qos_convert_pol_bucket_to_hw_fmt ( + policer_read_response_type_st *bkt, + sse2_qos_pol_hw_params_st *hw_fmt) +{ + memset(hw_fmt, 0, sizeof(sse2_qos_pol_hw_params_st)); +#if !defined (INTERNAL_SS) && !defined (X86) + hw_fmt->rfc = (uint8_t)bkt->rfc; + hw_fmt->allow_negative = (uint8_t)bkt->an; + hw_fmt->rate_exp = (uint8_t)bkt->rexp; + hw_fmt->avg_rate_man = (uint16_t)bkt->arm; + hw_fmt->peak_rate_man = (uint16_t)bkt->prm; + hw_fmt->comm_bkt_limit_man = (uint8_t)bkt->cblm; + hw_fmt->comm_bkt_limit_exp = (uint8_t)bkt->cble; + hw_fmt->extd_bkt_limit_man = (uint8_t)bkt->eblm; + hw_fmt->extd_bkt_limit_exp = (uint8_t)bkt->eble; + hw_fmt->extd_bkt = bkt->eb; + hw_fmt->comm_bkt = bkt->cb; +#endif // if !defined (INTERNAL_SS) && !defined (X86) +} + +/* + * Input: h/w programmable parameter values in 'hw' + * Output: configured parameter values in 'cfg' + * Return: Status, success or failure code. + */ +static cerrno +sse2_pol_convert_hw_to_cfg_params (sse2_qos_pol_hw_params_st *hw, + sse2_qos_pol_cfg_params_st *cfg) +{ + uint64_t temp_rate; + + if ((hw == NULL) || (cfg == NULL)) { + return EINVAL; + } + + if ((hw->rfc == IPE_RFC_RFC4115) && + !(hw->peak_rate_man << hw->rate_exp) && + !(hw->extd_bkt_limit_man)) { + /* + * For a 1R2C, we set EIR = 0, EB = 0 + */ + cfg->rfc = SSE2_QOS_POLICER_TYPE_1R2C; + } else if (hw->rfc == IPE_RFC_RFC2697) { + cfg->rfc = SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697; + } else if (hw->rfc == IPE_RFC_RFC2698) { + cfg->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698; + } else if (hw->rfc == IPE_RFC_RFC4115) { + cfg->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115; + } else if (hw->rfc == IPE_RFC_MEF5CF1) { + cfg->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1; + } else { + return EINVAL; + } + + temp_rate = ((hw->avg_rate_man << hw->rate_exp) * 8LL * + SSE2_QOS_POL_TICKS_PER_SEC)/1000; + cfg->rb.kbps.cir_kbps = (uint32_t)temp_rate; + + temp_rate = ((hw->peak_rate_man << hw->rate_exp) * 8LL * + SSE2_QOS_POL_TICKS_PER_SEC)/1000; + cfg->rb.kbps.eir_kbps = (uint32_t)temp_rate; + + cfg->rb.kbps.cb_bytes = ((uint64_t)hw->comm_bkt_limit_man << + (uint64_t)hw->comm_bkt_limit_exp); + cfg->rb.kbps.eb_bytes = ((uint64_t)hw->extd_bkt_limit_man << + (uint64_t)hw->extd_bkt_limit_exp); + + if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697) { + /* + * For 1R3C in the hardware, EB = sum(CB, EB). Also, EIR = CIR. Restore + * values such that the configured params don't reflect this adjustment + */ + cfg->rb.kbps.eb_bytes = (cfg->rb.kbps.eb_bytes - + cfg->rb.kbps.cb_bytes); + cfg->rb.kbps.eir_kbps = 0; + } else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115) { + /* + * For 4115 in the hardware is excess rate and burst, but EA provides + * peak-rate, so adjust it to be eir + */ + cfg->rb.kbps.eir_kbps += cfg->rb.kbps.cir_kbps; + cfg->rb.kbps.eb_bytes += cfg->rb.kbps.cb_bytes; + } + /* h/w conversion to cfg is in kbps */ + cfg->rate_type = SSE2_QOS_RATE_KBPS; + cfg->overwrite_bucket = 0; + cfg->current_bucket = hw->comm_bkt; + cfg->extended_bucket = hw->extd_bkt; + + SSE2_QOS_DEBUG_INFO("configured params, cir: %u kbps, eir: %u kbps, cb " + "burst: 0x%llx bytes, eb burst: 0x%llx bytes", + cfg->rb.kbps.cir_kbps, cfg->rb.kbps.eir_kbps, + cfg->rb.kbps.cb_bytes, cfg->rb.kbps.eb_bytes); + SSE2_QOS_TR_INFO(SSE2_QOS_TP_INFO_22, cfg->rb.kbps.cir_kbps, + cfg->rb.kbps.eir_kbps, + (uint)cfg->rb.kbps.cb_bytes, (uint)cfg->rb.kbps.eb_bytes); + + return EOK; +} + +uint32_t +sse2_qos_convert_kbps_to_pps (uint32_t rate_kbps) +{ + uint64_t numer, denom, rnd_value = 0; + + // sse_qosrm_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT, + // SSE2_QOS_SHIP_CNT_POL_CONV_KBPS_TO_PPS); + + numer = (uint64_t)((uint64_t)rate_kbps * 1000LL); + denom = (uint64_t)((uint64_t)SSE2_QOS_POLICER_FIXED_PKT_SIZE * 8LL); + + (void)sse2_qos_pol_round(numer, denom, &rnd_value, + SSE2_QOS_ROUND_TO_CLOSEST); + + return((uint32_t)rnd_value); +} + +uint32_t +sse2_qos_convert_burst_bytes_to_ms (uint64_t burst_bytes, + uint32_t rate_kbps) +{ + uint64_t numer, denom, rnd_value = 0; + + //sse_qosrm_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT, + // SSE2_QOS_SHIP_CNT_POL_CONV_BYTES_TO_BURST_MS); + + numer = burst_bytes * 8LL; + denom = (uint64_t)rate_kbps; + + (void)sse2_qos_pol_round(numer, denom, &rnd_value, + SSE2_QOS_ROUND_TO_CLOSEST); + + return((uint32_t)rnd_value); +} + +/* + * Input: physical structure in 'phys', rate_type in cfg + * Output: configured parameters in 'cfg'. + * Return: Status, success or failure code. + */ +trans_layer_rc +sse2_pol_physical_2_logical (policer_read_response_type_st *phys, + sse2_qos_pol_cfg_params_st *cfg) +{ + cerrno rc; + sse2_qos_pol_hw_params_st pol_hw; + sse2_qos_pol_cfg_params_st kbps_cfg; + + memset(&pol_hw, 0, sizeof(sse2_qos_pol_hw_params_st)); + memset(&kbps_cfg, 0, sizeof(sse2_qos_pol_cfg_params_st)); + + if (!phys) { + SSE2_QOS_DEBUG_ERROR("Illegal parameters"); + return(Not_OK); + } + + sse2_qos_convert_pol_bucket_to_hw_fmt (phys, &pol_hw); + + rc = sse2_pol_convert_hw_to_cfg_params(&pol_hw, &kbps_cfg); + if (CERR_IS_NOTOK(rc)) { + SSE2_QOS_DEBUG_ERROR("Unable to convert hw params to config params. " + "Error: %d", rc); + return(Not_OK); + } + + /* check what rate type is required */ + switch (cfg->rate_type) { + case SSE2_QOS_RATE_KBPS: + /* copy all the data into kbps_cfg */ + cfg->rb.kbps.cir_kbps = kbps_cfg.rb.kbps.cir_kbps; + cfg->rb.kbps.eir_kbps = kbps_cfg.rb.kbps.eir_kbps; + cfg->rb.kbps.cb_bytes = kbps_cfg.rb.kbps.cb_bytes; + cfg->rb.kbps.eb_bytes = kbps_cfg.rb.kbps.eb_bytes; + break; + case SSE2_QOS_RATE_PPS: + cfg->rb.pps.cir_pps = + sse2_qos_convert_kbps_to_pps(kbps_cfg.rb.kbps.cir_kbps); + cfg->rb.pps.eir_pps = + sse2_qos_convert_kbps_to_pps(kbps_cfg.rb.kbps.eir_kbps); + cfg->rb.pps.cb_ms = sse2_qos_convert_burst_bytes_to_ms( + kbps_cfg.rb.kbps.cb_bytes, kbps_cfg.rb.kbps.cir_kbps); + cfg->rb.pps.eb_ms = sse2_qos_convert_burst_bytes_to_ms( + kbps_cfg.rb.kbps.eb_bytes, kbps_cfg.rb.kbps.eir_kbps); + break; + default: + SSE2_QOS_DEBUG_ERROR("Illegal rate type"); + return(Not_OK); + } + + /* cfg->rate_type remains what it was */ + cfg->rnd_type = kbps_cfg.rnd_type; + cfg->rfc = kbps_cfg.rfc; + cfg->overwrite_bucket = kbps_cfg.overwrite_bucket; + cfg->current_bucket = kbps_cfg.current_bucket; + cfg->extended_bucket = kbps_cfg.extended_bucket; + + return OK_pushHW; +} diff --git a/vnet/vnet/policer/xlate.h b/vnet/vnet/policer/xlate.h new file mode 100644 index 00000000000..b41666c897e --- /dev/null +++ b/vnet/vnet/policer/xlate.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/*--------------------------------------------------------------------------- + * from gdp_logical_qos.h + *--------------------------------------------------------------------------- + */ + +#ifndef __included_xlate_h__ +#define __included_xlate_h__ + +#include <vnet/policer/fix_types.h> +#include <vnet/policer/police.h> + +/* + * edt: * enum sse2_qos_policer_type_en + * Defines type of policer to be allocated + */ +typedef enum sse2_qos_policer_type_en_ { + SSE2_QOS_POLICER_TYPE_1R2C = 0, + SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697 = 1, + SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698 = 2, + SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115 = 3, + SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1 = 4, + SSE2_QOS_POLICER_TYPE_MAX +} sse2_qos_policer_type_en; + +/* + * edt: * enum + * Enum used to define type of rounding used when calculating policer values + */ +typedef enum { + SSE2_QOS_ROUND_TO_CLOSEST = 0, + SSE2_QOS_ROUND_TO_UP, + SSE2_QOS_ROUND_TO_DOWN, + SSE2_QOS_ROUND_INVALID +} sse2_qos_round_type_en; + +/* + * edt: * enum + * Enum used to define type of rate for configuration, either pps or kbps. + * If kbps, then burst is in bytes, if pps, then burst is in ms. + * + * Default of zero is kbps, which is inline with how it is programmed + * in actual hardware. However, the warning is that this is reverse logic + * of units_in_bits field in sse2_static_policer_parameters_st, which is + * inline with sse_punt_drop.h. + */ +typedef enum { + SSE2_QOS_RATE_KBPS = 0, + SSE2_QOS_RATE_PPS, + SSE2_QOS_RATE_INVALID +} sse2_qos_rate_type_en; + +/* + * edt: * struct sse2_qos_pol_cfg_params_st + * + * Description: + * This structure is used to hold user configured policing parameters. + * + * element: cir_kbps + * CIR in kbps. + * element: eir_kbps + * EIR or PIR in kbps. + * element: cb_bytes + * Committed Burst in bytes. + * element: eb_bytes + * Excess or Peak Burst in bytes. + * element: cir_pps + * CIR in pps. + * element: eir_pps + * EIR or PIR in pps. + * element: cb_ms + * Committed Burst in milliseconds. + * element: eb_ms + * Excess or Peak Burst in milliseconds. + * element: rate_type + * Indicates the union if in kbps/bytes or pps/ms. + * element: rfc + * Policer algorithm - 1R2C, 1R3C (2697), 2R3C (2698) or 2R3C (4115). See + * sse_qos_policer_type_en + * element: rnd_type + * Rounding type (see sse_qos_round_type_en). Needed when policer values + * need to be rounded. Caller can decide on type of rounding used + */ +typedef struct sse2_qos_pol_cfg_params_st_ { + union { + struct { + uint32_t cir_kbps; + uint32_t eir_kbps; + uint64_t cb_bytes; + uint64_t eb_bytes; + } PACKED kbps; + struct { + uint32_t cir_pps; + uint32_t eir_pps; + uint64_t cb_ms; + uint64_t eb_ms; + } PACKED pps; + } PACKED rb; /* rate burst config */ + uint8_t rate_type; /* sse2_qos_rate_type_en */ + uint8_t rnd_type; /* sse2_qos_round_type_en */ + uint8_t rfc; /* sse2_qos_policer_type_en */ + uint8_t overwrite_bucket; /* for debugging purposes */ + uint32_t current_bucket; /* for debugging purposes */ + uint32_t extended_bucket; /* for debugging purposes */ +} sse2_qos_pol_cfg_params_st; + + +typedef struct sse2_qos_pol_hw_params_st_ { + uint8_t rfc; + uint8_t allow_negative; + uint8_t rate_exp; + uint16_t avg_rate_man; + uint16_t peak_rate_man; + uint8_t comm_bkt_limit_exp; + uint8_t comm_bkt_limit_man; + uint8_t extd_bkt_limit_exp; + uint8_t extd_bkt_limit_man; + uint32_t comm_bkt; + uint32_t extd_bkt; +} sse2_qos_pol_hw_params_st; + + +trans_layer_rc +sse2_pol_logical_2_physical (sse2_qos_pol_cfg_params_st *cfg, + policer_read_response_type_st *phys); + + +#endif /* __included_xlate_h__ */ diff --git a/vnet/vnet/ppp/error.def b/vnet/vnet/ppp/error.def new file mode 100644 index 00000000000..ba645408582 --- /dev/null +++ b/vnet/vnet/ppp/error.def @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ppp_error.def: ppp errors + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +ppp_error (NONE, "no error") +ppp_error (UNKNOWN_PROTOCOL, "unknown ppp protocol") +ppp_error (UNKNOWN_ADDRESS_CONTROL, "address, control != 0xff03") diff --git a/vnet/vnet/ppp/node.c b/vnet/vnet/ppp/node.c new file mode 100644 index 00000000000..4f813732df2 --- /dev/null +++ b/vnet/vnet/ppp/node.c @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ppp_node.c: ppp packet processing + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/ppp/ppp.h> +#include <vppinfra/sparse_vec.h> + +#define foreach_ppp_input_next \ + _ (PUNT, "error-punt") \ + _ (DROP, "error-drop") + +typedef enum { +#define _(s,n) PPP_INPUT_NEXT_##s, + foreach_ppp_input_next +#undef _ + PPP_INPUT_N_NEXT, +} ppp_input_next_t; + +typedef struct { + u8 packet_data[32]; +} ppp_input_trace_t; + +static u8 * format_ppp_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ppp_input_trace_t * t = va_arg (*va, ppp_input_trace_t *); + + s = format (s, "%U", format_ppp_header, t->packet_data); + + return s; +} + +typedef struct { + /* Sparse vector mapping ppp protocol in network byte order + to next index. */ + u16 * next_by_protocol; + + u32 * sparse_index_by_next_index; +} ppp_input_runtime_t; + +static uword +ppp_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ppp_input_runtime_t * rt = (void *) node->runtime_data; + u32 n_left_from, next_index, i_next, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, + from, + n_left_from, + sizeof (from[0]), + sizeof (ppp_input_trace_t)); + + next_index = node->cached_next_index; + i_next = vec_elt (rt->sparse_index_by_next_index, next_index); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + ppp_header_t * h0, * h1; + u32 i0, i1, protocol0, protocol1, enqueue_code; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = (void *) (b0->data + b0->current_data); + h1 = (void *) (b1->data + b1->current_data); + + b0->current_data += sizeof (h0[0]); + b1->current_data += sizeof (h1[0]); + + b0->current_length -= sizeof (h0[0]); + b1->current_length -= sizeof (h1[0]); + + /* Index sparse array with network byte order. */ + protocol0 = h0->protocol; + protocol1 = h1->protocol; + sparse_vec_index2 (rt->next_by_protocol, protocol0, protocol1, &i0, &i1); + + b0->error = node->errors[i0 == SPARSE_VEC_INVALID_INDEX ? PPP_ERROR_UNKNOWN_PROTOCOL : PPP_ERROR_NONE]; + b1->error = node->errors[i1 == SPARSE_VEC_INVALID_INDEX ? PPP_ERROR_UNKNOWN_PROTOCOL : PPP_ERROR_NONE]; + + enqueue_code = (i0 != i_next) + 2*(i1 != i_next); + + if (PREDICT_FALSE (enqueue_code != 0)) + { + switch (enqueue_code) + { + case 1: + /* A B A */ + to_next[-2] = bi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i0), bi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i1), bi1); + break; + + case 3: + /* A B B or A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i0), bi0); + vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i1), bi1); + if (i0 == i1) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + i_next = i1; + next_index = vec_elt (rt->next_by_protocol, i_next); + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + } + } + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + ppp_header_t * h0; + u32 i0, protocol0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = (void *) (b0->data + b0->current_data); + + b0->current_data += sizeof (h0[0]); + b0->current_length -= sizeof (h0[0]); + + protocol0 = h0->protocol; + i0 = sparse_vec_index (rt->next_by_protocol, protocol0); + + b0->error = node->errors[i0 == SPARSE_VEC_INVALID_INDEX ? PPP_ERROR_UNKNOWN_PROTOCOL : PPP_ERROR_NONE]; + + /* Sent packet to wrong next? */ + if (PREDICT_FALSE (i0 != i_next)) + { + /* Return old frame; remove incorrectly enqueued packet. */ + vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); + + /* Send to correct next. */ + i_next = i0; + next_index = vec_elt (rt->next_by_protocol, i_next); + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static char * ppp_error_strings[] = { +#define ppp_error(n,s) s, +#include "error.def" +#undef ppp_error +}; + +VLIB_REGISTER_NODE (ppp_input_node) = { + .function = ppp_input, + .name = "ppp-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof (ppp_input_runtime_t), + + .n_errors = PPP_N_ERROR, + .error_strings = ppp_error_strings, + + .n_next_nodes = PPP_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [PPP_INPUT_NEXT_##s] = n, + foreach_ppp_input_next +#undef _ + }, + + .format_buffer = format_ppp_header_with_length, + .format_trace = format_ppp_input_trace, + .unformat_buffer = unformat_ppp_header, +}; + +static clib_error_t * ppp_input_init (vlib_main_t * vm) +{ + ppp_input_runtime_t * rt; + + { + clib_error_t * error = vlib_call_init_function (vm, ppp_init); + if (error) + clib_error_report (error); + } + + ppp_setup_node (vm, ppp_input_node.index); + + rt = vlib_node_get_runtime_data (vm, ppp_input_node.index); + + rt->next_by_protocol = sparse_vec_new + (/* elt bytes */ sizeof (rt->next_by_protocol[0]), + /* bits in index */ BITS (((ppp_header_t *) 0)->protocol)); + + vec_validate (rt->sparse_index_by_next_index, PPP_INPUT_NEXT_DROP); + vec_validate (rt->sparse_index_by_next_index, PPP_INPUT_NEXT_PUNT); + rt->sparse_index_by_next_index[PPP_INPUT_NEXT_DROP] + = SPARSE_VEC_INVALID_INDEX; + rt->sparse_index_by_next_index[PPP_INPUT_NEXT_PUNT] + = SPARSE_VEC_INVALID_INDEX; + + return 0; +} + +VLIB_INIT_FUNCTION (ppp_input_init); + +void +ppp_register_input_protocol (vlib_main_t * vm, + ppp_protocol_t protocol, + u32 node_index) +{ + ppp_main_t * em = &ppp_main; + ppp_protocol_info_t * pi; + ppp_input_runtime_t * rt; + u16 * n; + u32 i; + + { + clib_error_t * error = vlib_call_init_function (vm, ppp_input_init); + if (error) + clib_error_report (error); + } + + pi = ppp_get_protocol_info (em, protocol); + pi->node_index = node_index; + pi->next_index = vlib_node_add_next (vm, + ppp_input_node.index, + node_index); + + /* Setup ppp protocol -> next index sparse vector mapping. */ + rt = vlib_node_get_runtime_data (vm, ppp_input_node.index); + n = sparse_vec_validate (rt->next_by_protocol, clib_host_to_net_u16 (protocol)); + n[0] = pi->next_index; + + /* Rebuild next index -> sparse index inverse mapping when sparse vector + is updated. */ + vec_validate (rt->sparse_index_by_next_index, pi->next_index); + for (i = 1; i < vec_len (rt->next_by_protocol); i++) + rt->sparse_index_by_next_index[rt->next_by_protocol[i]] = i; +} diff --git a/vnet/vnet/ppp/packet.h b/vnet/vnet/ppp/packet.h new file mode 100644 index 00000000000..da034daecfa --- /dev/null +++ b/vnet/vnet/ppp/packet.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_ppp_packet_h +#define included_vnet_ppp_packet_h + +/* + * PPP packet format + * + * Copyright (c) 2009 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* +See http://www.iana.org/assignments/ppp-numbers. + +The Point-to-Point Protocol (PPP) Data Link Layer [146,147,175] +contains a 16 bit Protocol field to identify the the encapsulated +protocol. The Protocol field is consistent with the ISO 3309 (HDLC) +extension mechanism for Address fields. All Protocols MUST be +assigned such that the least significant bit of the most significant +octet equals "0", and the least significant bit of the least +significant octet equals "1". +*/ + +#define foreach_ppp_protocol \ +_ (0x0001, padding) \ +_ (0x0003, rohc_small_cid) \ +_ (0x0005, rohc_large_cid) \ +_ (0x0021, ip4) \ +_ (0x0023, osi) \ +_ (0x0025, xerox_ns_idp) \ +_ (0x0027, decnet) \ +_ (0x0029, appletalk) \ +_ (0x002b, ipx) \ +_ (0x002d, vj_compressed_tcp) \ +_ (0x002f, vj_uncompressed_tcp) \ +_ (0x0031, bpdu) \ +_ (0x0033, streams) \ +_ (0x0035, vines) \ +_ (0x0039, appletalk_eddp) \ +_ (0x003b, appletalk_smart_buffered) \ +_ (0x003d, multilink) \ +_ (0x003f, netbios_framing) \ +_ (0x0041, cisco) \ +_ (0x0043, timeplex) \ +_ (0x0045, fujitsu_lblb) \ +_ (0x0047, dca_remote_lan) \ +_ (0x0049, sdtp) \ +_ (0x004b, sna_over_802_2) \ +_ (0x004d, sna) \ +_ (0x004f, ip6_header_compression) \ +_ (0x0051, knx) \ +_ (0x0053, encryption) \ +_ (0x0055, link_encryption) \ +_ (0x0057, ip6) \ +_ (0x0059, ppp_mux) \ +_ (0x005b, vendor_specific_a) \ +_ (0x0061, rtp_iphc_full_header) \ +_ (0x0063, rtp_iphc_compressed_tcp) \ +_ (0x0065, rtp_iphc_compressed_non_tcp) \ +_ (0x0067, rtp_iphc_compressed_udp_8) \ +_ (0x0069, rtp_iphc_compressed_rtp_8) \ +_ (0x006f, stampede) \ +_ (0x0073, mp_plus) \ +_ (0x007d, control) \ +_ (0x00c1, ntcits_ipi) \ +_ (0x00cf, ppp_nlpid) \ +_ (0x00fb, multilink_compression) \ +_ (0x00fd, compressed_datagram) \ +_ (0x0201, 802_1d_hello) \ +_ (0x0203, ibm_source_routing) \ +_ (0x0205, dec_lanbridge) \ +_ (0x0207, cdp) \ +_ (0x0209, netcs) \ +_ (0x020b, stp) \ +_ (0x020d, edp) \ +_ (0x0211, oscp_a) \ +_ (0x0213, oscp_b) \ +_ (0x0231, luxcom) \ +_ (0x0233, sigma) \ +_ (0x0235, apple_client_server) \ +_ (0x0281, mpls_unicast) \ +_ (0x0283, mpls_multicast) \ +_ (0x0285, ieee_p1284_4) \ +_ (0x0287, tetra) \ +_ (0x0289, multichannel_flow_treatment) \ +_ (0x2063, rtp_iphc_compressed_tcp_no_delta) \ +_ (0x2065, rtp_iphc_context_state) \ +_ (0x2067, rtp_iphc_compressed_udp_16) \ +_ (0x2069, rtp_iphc_compressed_rtp_16) \ +_ (0x4001, cray) \ +_ (0x4003, cdpd) \ +_ (0x4005, expand) \ +_ (0x4007, odsicp) \ +_ (0x4009, docsis_dll) \ +_ (0x400B, cetacean) \ +_ (0x4021, lzs) \ +_ (0x4023, reftek) \ +_ (0x4025, fibre_channel) \ +_ (0x4027, emit) \ +_ (0x405b, vendor_specific_b) \ +_ (0xc021, lcp) \ +_ (0xc023, pap) \ +_ (0xc025, link_quality_report) \ +_ (0xc027, shiva_password) \ +_ (0xc029, cbcp) \ +_ (0xc02b, bacp) \ +_ (0xc02d, bap) \ +_ (0xc05b, vendor_specific_password) \ +_ (0xc081, container_control) \ +_ (0xc223, chap) \ +_ (0xc225, rsa) \ +_ (0xc227, extensible_authentication) \ +_ (0xc229, mitsubishi_security_info) \ +_ (0xc26f, stampede_authorization) \ +_ (0xc281, proprietary_authentication_a) \ +_ (0xc283, proprietary_authentication_b) \ +_ (0xc481, proprietary_node_id_authentication) + +typedef enum { +#define _(n,f) PPP_PROTOCOL_##f = n, + foreach_ppp_protocol +#undef _ +} ppp_protocol_t; + +/* PPP Link Control Protocol (LCP) and Internet Protocol Control Protocol (IPCP) Codes + +The Point-to-Point Protocol (PPP) Link Control Protocol (LCP), +the Compression Control Protocol (CCP), Internet Protocol Control +Protocol (IPCP), and other control protocols, contain an 8 bit +Code field which identifies the type of packet. */ + +#define foreach_ppp_lcp_code \ +_ (0, vendor_specific) \ +_ (1, configure_request) \ +_ (2, configure_ack) \ +_ (3, configure_nak) \ +_ (4, configure_reject) \ +_ (5, terminate_request) \ +_ (6, terminate_ack) \ +_ (7, code_reject) \ +_ (8, protocol_reject) \ +_ (9, echo_request) \ +_ (10, echo_reply) \ +_ (11, discard_request) \ +_ (12, identification) \ +_ (13, time_remaining) \ +_ (14, reset_request) \ +_ (15, reset_reply) + +typedef struct { + /* Set to 0xff 0x03 */ + u8 address, control; + + /* Layer 3 protocol for this packet. */ + u16 protocol; +} ppp_header_t; + +#endif /* included_vnet_ppp_packet_h */ diff --git a/vnet/vnet/ppp/pg.c b/vnet/vnet/ppp/pg.c new file mode 100644 index 00000000000..2662bdc1999 --- /dev/null +++ b/vnet/vnet/ppp/pg.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ppp_pg.c: packet generator ppp interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/ppp/ppp.h> + +typedef struct { + pg_edit_t address; + pg_edit_t control; + pg_edit_t protocol; +} pg_ppp_header_t; + +static inline void +pg_ppp_header_init (pg_ppp_header_t * e) +{ + pg_edit_init (&e->address, ppp_header_t, address); + pg_edit_init (&e->control, ppp_header_t, control); + pg_edit_init (&e->protocol, ppp_header_t, protocol); +} + +uword +unformat_pg_ppp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_ppp_header_t * h; + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (ppp_header_t), + &group_index); + pg_ppp_header_init (h); + + pg_edit_set_fixed (&h->address, 0xff); + pg_edit_set_fixed (&h->control, 0x03); + + error = 1; + if (! unformat (input, "%U", + unformat_pg_edit, + unformat_ppp_protocol_net_byte_order, &h->protocol)) + goto done; + + { + ppp_main_t * pm = &ppp_main; + ppp_protocol_info_t * pi = 0; + pg_node_t * pg_node = 0; + + if (h->protocol.type == PG_EDIT_FIXED) + { + u16 t = *(u16 *) h->protocol.values[PG_EDIT_LO]; + pi = ppp_get_protocol_info (pm, clib_net_to_host_u16 (t)); + if (pi && pi->node_index != ~0) + pg_node = pg_get_node (pi->node_index); + } + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto done; + } + + error = 0; + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/ppp/ppp.c b/vnet/vnet/ppp/ppp.c new file mode 100644 index 00000000000..1537c9a50c3 --- /dev/null +++ b/vnet/vnet/ppp/ppp.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ppp.c: ppp support + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/ppp/ppp.h> + +/* Global main structure. */ +ppp_main_t ppp_main; + +u8 * format_ppp_protocol (u8 * s, va_list * args) +{ + ppp_protocol_t p = va_arg (*args, u32); + ppp_main_t * pm = &ppp_main; + ppp_protocol_info_t * pi = ppp_get_protocol_info (pm, p); + + if (pi) + s = format (s, "%s", pi->name); + else + s = format (s, "0x%04x", p); + + return s; +} + +u8 * format_ppp_header_with_length (u8 * s, va_list * args) +{ + ppp_main_t * pm = &ppp_main; + ppp_header_t * h = va_arg (*args, ppp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + ppp_protocol_t p = clib_net_to_host_u16 (h->protocol); + uword indent, header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "ppp header truncated"); + + indent = format_get_indent (s); + + s = format (s, "PPP %U", format_ppp_protocol, p); + + if (h->address != 0xff) + s = format (s, ", address 0x%02x", h->address); + if (h->control != 0x03) + s = format (s, ", control 0x%02x", h->control); + + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + { + ppp_protocol_info_t * pi = ppp_get_protocol_info (pm, p); + vlib_node_t * node = vlib_get_node (pm->vlib_main, pi->node_index); + if (node->format_buffer) + s = format (s, "\n%U%U", + format_white_space, indent, + node->format_buffer, (void *) (h + 1), + max_header_bytes - header_bytes); + } + + return s; +} + +u8 * format_ppp_header (u8 * s, va_list * args) +{ + ppp_header_t * h = va_arg (*args, ppp_header_t *); + return format (s, "%U", format_ppp_header_with_length, h, 0); +} + +/* Returns ppp protocol as an int in host byte order. */ +uword +unformat_ppp_protocol_host_byte_order (unformat_input_t * input, + va_list * args) +{ + u16 * result = va_arg (*args, u16 *); + ppp_main_t * pm = &ppp_main; + int p, i; + + /* Numeric type. */ + if (unformat (input, "0x%x", &p) + || unformat (input, "%d", &p)) + { + if (p >= (1 << 16)) + return 0; + *result = p; + return 1; + } + + /* Named type. */ + if (unformat_user (input, unformat_vlib_number_by_name, + pm->protocol_info_by_name, &i)) + { + ppp_protocol_info_t * pi = vec_elt_at_index (pm->protocol_infos, i); + *result = pi->protocol; + return 1; + } + + return 0; +} + +uword +unformat_ppp_protocol_net_byte_order (unformat_input_t * input, + va_list * args) +{ + u16 * result = va_arg (*args, u16 *); + if (! unformat_user (input, unformat_ppp_protocol_host_byte_order, result)) + return 0; + *result = clib_host_to_net_u16 ((u16) *result); + return 1; +} + +uword +unformat_ppp_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + ppp_header_t _h, * h = &_h; + u16 p; + + if (! unformat (input, "%U", + unformat_ppp_protocol_host_byte_order, &p)) + return 0; + + h->address = 0xff; + h->control = 0x03; + h->protocol = clib_host_to_net_u16 (p); + + /* Add header to result. */ + { + void * p; + u32 n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, n_bytes); + memcpy (p, h, n_bytes); + } + + return 1; +} + +static uword ppp_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + ppp_header_t * h = rewrite; + ppp_protocol_t protocol; + + if (max_rewrite_bytes < sizeof (h[0])) + return 0; + + switch (l3_type) { +#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = PPP_PROTOCOL_##b; break + _ (IP4, ip4); + _ (IP6, ip6); + _ (MPLS_UNICAST, mpls_unicast); + _ (MPLS_MULTICAST, mpls_multicast); +#undef _ + default: + return 0; + } + + h->address = 0xff; + h->control = 0x03; + h->protocol = clib_host_to_net_u16 (protocol); + + return sizeof (h[0]); +} + +VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = { + .name = "PPP", + .format_header = format_ppp_header_with_length, + .unformat_header = unformat_ppp_header, + .set_rewrite = ppp_set_rewrite, +}; + +static void add_protocol (ppp_main_t * pm, + ppp_protocol_t protocol, + char * protocol_name) +{ + ppp_protocol_info_t * pi; + u32 i; + + vec_add2 (pm->protocol_infos, pi, 1); + i = pi - pm->protocol_infos; + + pi->name = protocol_name; + pi->protocol = protocol; + pi->next_index = pi->node_index = ~0; + + hash_set (pm->protocol_info_by_protocol, protocol, i); + hash_set_mem (pm->protocol_info_by_name, pi->name, i); +} + +static clib_error_t * ppp_init (vlib_main_t * vm) +{ + ppp_main_t * pm = &ppp_main; + + memset (pm, 0, sizeof (pm[0])); + pm->vlib_main = vm; + + pm->protocol_info_by_name = hash_create_string (0, sizeof (uword)); + pm->protocol_info_by_protocol = hash_create (0, sizeof (uword)); + +#define _(n,s) add_protocol (pm, PPP_PROTOCOL_##s, #s); + foreach_ppp_protocol; +#undef _ + + return vlib_call_init_function (vm, ppp_input_init); +} + +VLIB_INIT_FUNCTION (ppp_init); + +ppp_main_t * ppp_get_main (vlib_main_t * vm) +{ + vlib_call_init_function (vm, ppp_init); + return &ppp_main; +} + diff --git a/vnet/vnet/ppp/ppp.h b/vnet/vnet/ppp/ppp.h new file mode 100644 index 00000000000..e512df7f108 --- /dev/null +++ b/vnet/vnet/ppp/ppp.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ppp.h: types/functions for ppp. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ppp_h +#define included_ppp_h + +#include <vnet/vnet.h> +#include <vnet/ppp/packet.h> +#include <vnet/pg/pg.h> + +vnet_hw_interface_class_t ppp_hw_interface_class; + +typedef enum { +#define ppp_error(n,s) PPP_ERROR_##n, +#include <vnet/ppp/error.def> +#undef ppp_error + PPP_N_ERROR, +} ppp_error_t; + +typedef struct { + /* Name (a c string). */ + char * name; + + /* PPP protocol type in host byte order. */ + ppp_protocol_t protocol; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} ppp_protocol_info_t; + +typedef struct { + vlib_main_t * vlib_main; + + ppp_protocol_info_t * protocol_infos; + + /* Hash tables mapping name/protocol to protocol info index. */ + uword * protocol_info_by_name, * protocol_info_by_protocol; +} ppp_main_t; + +always_inline ppp_protocol_info_t * +ppp_get_protocol_info (ppp_main_t * em, ppp_protocol_t protocol) +{ + uword * p = hash_get (em->protocol_info_by_protocol, protocol); + return p ? vec_elt_at_index (em->protocol_infos, p[0]) : 0; +} + +extern ppp_main_t ppp_main; + +/* Register given node index to take input for given ppp type. */ +void +ppp_register_input_type (vlib_main_t * vm, + ppp_protocol_t protocol, + u32 node_index); + +void ppp_set_adjacency (vnet_rewrite_header_t * rw, + uword max_data_bytes, + ppp_protocol_t protocol); + +format_function_t format_ppp_protocol; +format_function_t format_ppp_header; +format_function_t format_ppp_header_with_length; + +/* Parse ppp protocol as 0xXXXX or protocol name. + In either host or network byte order. */ +unformat_function_t unformat_ppp_protocol_host_byte_order; +unformat_function_t unformat_ppp_protocol_net_byte_order; + +/* Parse ppp header. */ +unformat_function_t unformat_ppp_header; +unformat_function_t unformat_pg_ppp_header; + +always_inline void +ppp_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + pg_node_t * pn = pg_get_node (node_index); + + n->format_buffer = format_ppp_header_with_length; + n->unformat_buffer = unformat_ppp_header; + pn->unformat_edit = unformat_pg_ppp_header; +} + +void +ppp_register_input_protocol (vlib_main_t * vm, + ppp_protocol_t protocol, + u32 node_index); + +#endif /* included_ppp_h */ diff --git a/vnet/vnet/replication.c b/vnet/vnet/replication.c new file mode 100644 index 00000000000..6842684984d --- /dev/null +++ b/vnet/vnet/replication.c @@ -0,0 +1,276 @@ +/* + * replication.c : packet replication + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/replication.h> + + +replication_main_t replication_main; + + +replication_context_t * +replication_prep (vlib_main_t * vm, + vlib_buffer_t * b0, + u32 recycle_node_index, + u32 l2_packet) +{ + replication_main_t * rm = &replication_main; + replication_context_t * ctx; + uword cpu_number = vm->cpu_index; + ip4_header_t * ip; + u32 ctx_id; + + // Allocate a context, reserve context 0 + if (PREDICT_FALSE(rm->contexts[cpu_number] == 0)) + pool_get_aligned (rm->contexts[cpu_number], ctx, CLIB_CACHE_LINE_BYTES); + + pool_get_aligned (rm->contexts[cpu_number], ctx, CLIB_CACHE_LINE_BYTES); + ctx_id = ctx - rm->contexts[cpu_number]; + + // Save state from vlib buffer + ctx->saved_clone_count = b0->clone_count; + ctx->saved_free_list_index = b0->free_list_index; + ctx->current_data = b0->current_data; + + // Set up vlib buffer hooks + b0->clone_count = ctx_id; + b0->free_list_index = rm->recycle_list_index; + + // Save feature state + ctx->recycle_node_index = recycle_node_index; + + // Save vnet state + memcpy (ctx->vnet_buffer, vnet_buffer(b0), sizeof(vnet_buffer_opaque_t)); + + // Save packet contents + ctx->l2_packet = l2_packet; + ip = (ip4_header_t *)vlib_buffer_get_current (b0); + if (l2_packet) { + // Save ethernet header + ctx->l2_header[0] = ((u64 *)ip)[0]; + ctx->l2_header[1] = ((u64 *)ip)[1]; + ctx->l2_header[2] = ((u64 *)ip)[2]; + // set ip to the true ip header + ip = (ip4_header_t *)(((u8 *)ip) + vnet_buffer(b0)->l2.l2_len); + } + + // Copy L3 fields. + // We need to save TOS for ip4 and ip6 packets. Fortunately the TOS field is + // in the first two bytes of both the ip4 and ip6 headers. + ctx->ip_tos = *((u16 *)(ip)); + + // Save the ip4 checksum as well. We just blindly save the corresponding two + // bytes even for ip6 packets. + ctx->ip4_checksum = ip->checksum; + + return ctx; +} + + +replication_context_t * +replication_recycle (vlib_main_t * vm, + vlib_buffer_t * b0, + u32 is_last) +{ + replication_main_t * rm = &replication_main; + replication_context_t * ctx; + uword cpu_number = vm->cpu_index; + ip4_header_t * ip; + + // Get access to the replication context + ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->clone_count); + + // Restore vnet buffer state + memcpy (vnet_buffer(b0), ctx->vnet_buffer, sizeof(vnet_buffer_opaque_t)); + + // Restore the packet start (current_data) and length + vlib_buffer_advance(b0, ctx->current_data - b0->current_data); + + // Restore packet contents + ip = (ip4_header_t *)vlib_buffer_get_current (b0); + if (ctx->l2_packet) { + // Restore ethernet header + ((u64 *)ip)[0] = ctx->l2_header[0]; + ((u64 *)ip)[1] = ctx->l2_header[1]; + ((u64 *)ip)[2] = ctx->l2_header[2]; + // set ip to the true ip header + ip = (ip4_header_t *)(((u8 *)ip) + vnet_buffer(b0)->l2.l2_len); + } + + // Restore L3 fields + *((u16 *)(ip)) = ctx->ip_tos; + ip->checksum = ctx->ip4_checksum; + + if (is_last) { + // This is the last replication in the list. + // Restore original buffer free functionality. + b0->clone_count = ctx->saved_clone_count; + b0->free_list_index = ctx->saved_free_list_index; + + // Free context back to its pool + pool_put (rm->contexts[cpu_number], ctx); + } + + return ctx; +} + + + +/* + * fish pkts back from the recycle queue/freelist + * un-flatten the context chains + */ +static void replication_recycle_callback (vlib_main_t *vm, + vlib_buffer_free_list_t * fl) +{ + vlib_frame_t * f = 0; + u32 n_left_from; + u32 n_left_to_next = 0; + u32 n_this_frame = 0; + u32 * from; + u32 * to_next = 0; + u32 bi0, pi0; + vlib_buffer_t *b0; + vlib_buffer_t *bnext0; + int i; + replication_main_t * rm = &replication_main; + replication_context_t * ctx; + u32 feature_node_index = 0; + uword cpu_number = vm->cpu_index; + + // All buffers in the list are destined to the same recycle node. + // Pull the recycle node index from the first buffer. + // Note: this could be sped up if the node index were stuffed into + // the freelist itself. + if (vec_len (fl->aligned_buffers) > 0) { + bi0 = fl->aligned_buffers[0]; + b0 = vlib_get_buffer (vm, bi0); + ctx = pool_elt_at_index (rm->contexts[cpu_number], + b0->clone_count); + feature_node_index = ctx->recycle_node_index; + } else if (vec_len (fl->unaligned_buffers) > 0) { + bi0 = fl->unaligned_buffers[0]; + b0 = vlib_get_buffer (vm, bi0); + ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->clone_count); + feature_node_index = ctx->recycle_node_index; + } + + /* aligned, unaligned buffers */ + for (i = 0; i < 2; i++) + { + if (i == 0) + { + from = fl->aligned_buffers; + n_left_from = vec_len (from); + } + else + { + from = fl->unaligned_buffers; + n_left_from = vec_len (from); + } + + while (n_left_from > 0) + { + if (PREDICT_FALSE(n_left_to_next == 0)) + { + if (f) + { + f->n_vectors = n_this_frame; + vlib_put_frame_to_node (vm, feature_node_index, f); + } + + f = vlib_get_frame_to_node (vm, feature_node_index); + to_next = vlib_frame_vector_args (f); + n_left_to_next = VLIB_FRAME_SIZE; + n_this_frame = 0; + } + + bi0 = from[0]; + if (PREDICT_TRUE(n_left_from > 1)) + { + pi0 = from[1]; + vlib_prefetch_buffer_with_index(vm,pi0,LOAD); + } + + bnext0 = b0 = vlib_get_buffer (vm, bi0); + + // Mark that this buffer was just recycled + b0->flags |= VLIB_BUFFER_IS_RECYCLED; + + // If buffer is traced, mark frame as traced + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + f->flags |= VLIB_FRAME_TRACE; + + while (bnext0->flags & VLIB_BUFFER_NEXT_PRESENT) + { + from += 1; + n_left_from -= 1; + bnext0 = vlib_get_buffer (vm, bnext0->next_buffer); + } + to_next[0] = bi0; + + from++; + to_next++; + n_this_frame++; + n_left_to_next--; + n_left_from--; + } + } + + vec_reset_length (fl->aligned_buffers); + vec_reset_length (fl->unaligned_buffers); + + if (f) + { + ASSERT(n_this_frame); + f->n_vectors = n_this_frame; + vlib_put_frame_to_node (vm, feature_node_index, f); + } +} + + + +clib_error_t *replication_init (vlib_main_t *vm) +{ + replication_main_t * rm = &replication_main; + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * fl; + __attribute__((unused)) replication_context_t * ctx; + vlib_thread_main_t * tm = vlib_get_thread_main(); + + rm->vlib_main = vm; + rm->vnet_main = vnet_get_main(); + rm->recycle_list_index = + vlib_buffer_create_free_list (vm, 1024 /* fictional */, + "replication-recycle"); + + fl = pool_elt_at_index (bm->buffer_free_list_pool, + rm->recycle_list_index); + + fl->buffers_added_to_freelist_function = replication_recycle_callback; + + // Verify the replication context is the expected size + ASSERT(sizeof(replication_context_t) == 128); // 2 cache lines + + vec_validate (rm->contexts, tm->n_vlib_mains - 1); + return 0; +} + +VLIB_INIT_FUNCTION (replication_init); diff --git a/vnet/vnet/replication.h b/vnet/vnet/replication.h new file mode 100644 index 00000000000..9de5717f4d3 --- /dev/null +++ b/vnet/vnet/replication.h @@ -0,0 +1,123 @@ +/* + * replication.h : packet replication + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_replication_h +#define included_replication_h + + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/replication.h> + + +typedef struct { + + // The entire vnet buffer header restored for each replica + u8 vnet_buffer[32]; // 16B aligned to allow vector unit copy + u8 reserved[32]; // space for future expansion of vnet buffer header + + // feature state used during this replication + u64 feature_replicas; // feature's id for its set of replicas + u32 feature_counter; // feature's current index into set of replicas + u32 recycle_node_index; // feature's recycle node index + + // data saved from the start of replication and restored at the end of replication + u32 saved_clone_count; // from vlib buffer + u32 saved_free_list_index; // from vlib buffer + + // data saved from the original packet and restored for each replica + u64 l2_header[3]; // 24B (must be at least 22B for l2 packets) + u16 ip_tos; // v4 and v6 + u16 ip4_checksum; // needed for v4 only + + // data saved from the vlib buffer header and restored for each replica + i16 current_data; // offset of first byte of packet in packet data + u8 pad[6]; // to 64B + u8 l2_packet; // flag for l2 vs l3 packet data + +} replication_context_t; // 128B + + +typedef struct { + + u32 recycle_list_index; + + // per-thread pools of replication contexts + replication_context_t ** contexts; + + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + +} replication_main_t; + + +extern replication_main_t replication_main; + + +// Return 1 if this buffer just came from the replication recycle handler. +always_inline u32 +replication_is_recycled (vlib_buffer_t * b0) +{ + return b0->flags & VLIB_BUFFER_IS_RECYCLED; +} + +// Clear the recycle flag. If buffer came from the replication recycle +// handler, this flag must be cleared before the packet is transmitted again. +always_inline void +replication_clear_recycled (vlib_buffer_t * b0) +{ + b0->flags &= ~VLIB_BUFFER_IS_RECYCLED; +} + +// Return the active replication context if this buffer has +// been recycled, otherwise return 0. (Note that this essentially +// restricts access to the replication context to the replication +// feature's prep and recycle nodes.) +always_inline replication_context_t * +replication_get_ctx (vlib_buffer_t * b0) +{ + replication_main_t * rm = &replication_main; + + return replication_is_recycled (b0) ? + pool_elt_at_index (rm->contexts[os_get_cpu_number()], b0->clone_count) : + 0; +} + +// Prefetch the replication context for this buffer, if it exists +always_inline void +replication_prefetch_ctx (vlib_buffer_t * b0) +{ + replication_context_t *ctx = replication_get_ctx (b0); + + if (ctx) { + CLIB_PREFETCH (ctx, (2*CLIB_CACHE_LINE_BYTES), STORE); + } +} + +replication_context_t * +replication_prep (vlib_main_t * vm, + vlib_buffer_t * b0, + u32 recycle_node_index, + u32 l2_packet); + +replication_context_t * +replication_recycle (vlib_main_t * vm, + vlib_buffer_t * b0, + u32 is_last); + + +#endif diff --git a/vnet/vnet/rewrite.c b/vnet/vnet/rewrite.c new file mode 100644 index 00000000000..6c33a2ebcd8 --- /dev/null +++ b/vnet/vnet/rewrite.c @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * rewrite.c: packet rewrite + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/ip/lookup.h> + +void vnet_rewrite_copy_slow_path (vnet_rewrite_data_t * p0, + vnet_rewrite_data_t * rw0, + word n_left, + uword most_likely_size) +{ + uword n_done = round_pow2 (most_likely_size, sizeof (rw0[0])) / sizeof (rw0[0]); + + p0 -= n_done; + rw0 -= n_done; + + /* As we enter the cleanup loop, p0 and rw0 point to the last chunk written + by the fast path. Hence, the constant 1, which the + vnet_rewrite_copy_one macro renders as p0[-1] = rw0[-1]. */ + + while (n_left > 0) + { + vnet_rewrite_copy_one (p0, rw0, 1); + p0--; + rw0--; + n_left--; + } +} + +u8 * format_vnet_rewrite (u8 * s, va_list * args) +{ + vlib_main_t * vm = va_arg (*args, vlib_main_t *); + vnet_rewrite_header_t * rw = va_arg (*args, vnet_rewrite_header_t *); + u32 max_data_bytes = va_arg (*args, u32); + vnet_main_t * vnm = vnet_get_main(); + vlib_node_t * next; + uword indent; + + next = vlib_get_next_node (vm, rw->node_index, rw->next_index); + + indent = format_get_indent (s); + + if (rw->sw_if_index != ~0) + { + vnet_sw_interface_t * si; + si = vnet_get_sw_interface (vnm, rw->sw_if_index); + s = format (s, "%U", format_vnet_sw_interface_name, vnm, si); + } + else + s = format (s, "%v", next->name); + + /* Format rewrite string. */ + if (rw->data_bytes > 0) + s = format (s, "\n%U%U", + format_white_space, indent, + next->format_buffer ? next->format_buffer : format_hex_bytes, + rw->data + max_data_bytes - rw->data_bytes, + rw->data_bytes); + + return s; +} + +u8 * format_vnet_rewrite_header (u8 * s, va_list * args) +{ + vlib_main_t * vm = va_arg (*args, vlib_main_t *); + vnet_rewrite_header_t * rw = va_arg (*args, vnet_rewrite_header_t *); + u8 * packet_data = va_arg (*args, u8 *); + u32 packet_data_bytes = va_arg (*args, u32); + vlib_node_t * next; + + next = vlib_get_next_node (vm, rw->node_index, rw->next_index); + + /* Format rewrite string. */ + s = format (s, "%U", + next->format_buffer ? next->format_buffer : format_hex_bytes, + packet_data, packet_data_bytes); + + return s; +} + +uword unformat_vnet_rewrite (unformat_input_t * input, va_list * args) +{ + vlib_main_t * vm = va_arg (*args, vlib_main_t *); + vnet_rewrite_header_t * rw = va_arg (*args, vnet_rewrite_header_t *); + u32 max_data_bytes = va_arg (*args, u32); + vnet_main_t * vnm = vnet_get_main(); + vlib_node_t * next; + u32 next_index, sw_if_index, max_packet_bytes, error; + u8 * rw_data; + + rw_data = 0; + sw_if_index = ~0; + max_packet_bytes = ~0; + error = 1; + + /* Parse sw interface. */ + if (unformat (input, "%U", + unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + vnet_hw_interface_t * hi; + + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + + next_index = hi->output_node_index; + max_packet_bytes = hi->max_l3_packet_bytes[VLIB_RX]; + } + + else if (unformat (input, "%U", + unformat_vlib_node, vm, &next_index)) + ; + + else + goto done; + + next = vlib_get_node (vm, next_index); + + if (next->unformat_buffer + && unformat_user (input, next->unformat_buffer, &rw_data)) + ; + + else if (unformat_user (input, unformat_hex_string, &rw_data) + || unformat (input, "0x%U", unformat_hex_string, &rw_data)) + ; + + else + goto done; + + /* Re-write does not fit. */ + if (vec_len (rw_data) >= max_data_bytes) + goto done; + + { + u32 tmp; + + if (unformat (input, "mtu %d", &tmp) + && tmp < (1 << BITS (rw->max_l3_packet_bytes))) + max_packet_bytes = tmp; + } + + error = 0; + rw->sw_if_index = sw_if_index; + rw->max_l3_packet_bytes = max_packet_bytes; + rw->next_index = vlib_node_add_next (vm, rw->node_index, next_index); + vnet_rewrite_set_data_internal (rw, max_data_bytes, rw_data, vec_len (rw_data)); + + done: + vec_free (rw_data); + return error == 0; +} + +void vnet_rewrite_for_sw_interface (vnet_main_t * vnm, + vnet_l3_packet_type_t packet_type, + u32 sw_if_index, + u32 node_index, + void * dst_address, + vnet_rewrite_header_t * rw, + u32 max_rewrite_bytes) +{ + vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + vnet_hw_interface_class_t * hc = vnet_get_hw_interface_class (vnm, hw->hw_class_index); + static u8 * rw_tmp = 0; + uword n_rw_tmp; + + rw->sw_if_index = sw_if_index; + rw->node_index = node_index; + rw->next_index = vlib_node_add_next (vnm->vlib_main, node_index, hw->output_node_index); + rw->max_l3_packet_bytes = hw->max_l3_packet_bytes[VLIB_TX]; + + ASSERT (max_rewrite_bytes > 0); + vec_reset_length (rw_tmp); + vec_validate (rw_tmp, max_rewrite_bytes - 1); + + ASSERT (hc->set_rewrite); + n_rw_tmp = hc->set_rewrite (vnm, sw_if_index, packet_type, dst_address, rw_tmp, max_rewrite_bytes); + + ASSERT (n_rw_tmp >= 0 && n_rw_tmp < max_rewrite_bytes); + vnet_rewrite_set_data_internal (rw, max_rewrite_bytes, rw_tmp, n_rw_tmp); +} + +void vnet_rewrite_for_tunnel (vnet_main_t * vnm, + u32 tx_sw_if_index, + u32 rewrite_node_index, + u32 post_rewrite_node_index, + vnet_rewrite_header_t * rw, + u8 *rewrite_data, + u32 rewrite_length) +{ + ip_adjacency_t * adj = 0; + /* + * Installed into vnet_buffer(b)->sw_if_index[VLIB_TX] e.g. + * by ip4_rewrite_inline. If the post-rewrite node injects into + * ipX-forward, this will be interpreted as a FIB number. + */ + rw->sw_if_index = tx_sw_if_index; + rw->node_index = rewrite_node_index; + rw->next_index = vlib_node_add_next (vnm->vlib_main, rewrite_node_index, + post_rewrite_node_index); + rw->max_l3_packet_bytes = (u16) ~0; /* we can't know at this point */ + + ASSERT (rewrite_length < sizeof (adj->rewrite_data)); + /* Leave room for ethernet + VLAN tag */ + vnet_rewrite_set_data_internal (rw, sizeof(adj->rewrite_data), + rewrite_data, rewrite_length); +} + +void serialize_vnet_rewrite (serialize_main_t * m, va_list * va) +{ + vnet_rewrite_header_t * rw = va_arg (*va, vnet_rewrite_header_t *); + u32 max_data_bytes = va_arg (*va, u32); + u8 * p; + + serialize_integer (m, rw->sw_if_index, sizeof (rw->sw_if_index)); + serialize_integer (m, rw->data_bytes, sizeof (rw->data_bytes)); + serialize_integer (m, rw->max_l3_packet_bytes, sizeof (rw->max_l3_packet_bytes)); + p = serialize_get (m, rw->data_bytes); + memcpy (p, vnet_rewrite_get_data_internal (rw, max_data_bytes), rw->data_bytes); +} + +void unserialize_vnet_rewrite (serialize_main_t * m, va_list * va) +{ + vnet_rewrite_header_t * rw = va_arg (*va, vnet_rewrite_header_t *); + u32 max_data_bytes = va_arg (*va, u32); + u8 * p; + + /* It is up to user to fill these in. */ + rw->node_index = ~0; + rw->next_index = ~0; + + unserialize_integer (m, &rw->sw_if_index, sizeof (rw->sw_if_index)); + unserialize_integer (m, &rw->data_bytes, sizeof (rw->data_bytes)); + unserialize_integer (m, &rw->max_l3_packet_bytes, sizeof (rw->max_l3_packet_bytes)); + p = unserialize_get (m, rw->data_bytes); + memcpy (vnet_rewrite_get_data_internal (rw, max_data_bytes), p, rw->data_bytes); +} diff --git a/vnet/vnet/rewrite.h b/vnet/vnet/rewrite.h new file mode 100644 index 00000000000..2e5b7f43948 --- /dev/null +++ b/vnet/vnet/rewrite.h @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * rewrite.h: packet rewrite + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_rewrite_h +#define included_vnet_rewrite_h + +#include <vlib/vlib.h> +#include <vnet/l3_types.h> + +/* Consider using vector types for speed? */ +typedef uword vnet_rewrite_data_t; + +typedef CLIB_PACKED (struct { + /* Interface to mark re-written packets with. */ + u32 sw_if_index; + + /* Packet processing node where rewrite happens. */ + u32 node_index; + + /* Next node to feed after packet rewrite is done. */ + u16 next_index; + + /* Number of bytes in rewrite data. */ + u16 data_bytes; + + /* Max packet size layer 3 (MTU) for output interface. + Used for MTU check after packet rewrite. */ + u16 max_l3_packet_bytes; + + /* Rewrite string starting at end and going backwards. */ + u8 data[0]; +}) vnet_rewrite_header_t; + +/* + Helper macro for declaring rewrite string w/ given max-size. + + Typical usage: + typedef struct { + // User data. + int a, b; + + // Total adjacency is 64 bytes. + vnet_rewrite_declare(64 - 2*sizeof(int)) rw; + } my_adjacency_t; +*/ +#define vnet_declare_rewrite(total_bytes) \ +struct { \ + vnet_rewrite_header_t rewrite_header; \ + \ + u8 rewrite_data[(total_bytes) - sizeof (vnet_rewrite_header_t)]; \ +} + +always_inline void +vnet_rewrite_set_data_internal (vnet_rewrite_header_t * rw, + int max_size, + void * data, + int data_bytes) +{ + /* Sanity check values carefully for this memset operation*/ + ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE)); + ASSERT ((data_bytes >= 0) && (data_bytes < max_size)); + + rw->data_bytes = data_bytes; + memcpy (rw->data + max_size - data_bytes, data, data_bytes); + memset (rw->data, 0xfe, max_size - data_bytes); +} + +#define vnet_rewrite_set_data(rw,data,data_bytes) \ + vnet_rewrite_set_data_internal (&((rw).rewrite_header), \ + sizeof ((rw).rewrite_data), \ + (data), \ + (data_bytes)) + +always_inline void * +vnet_rewrite_get_data_internal (vnet_rewrite_header_t * rw, int max_size) +{ + ASSERT (rw->data_bytes <= max_size); + return rw->data + max_size - rw->data_bytes; +} + +#define vnet_rewrite_get_data(rw) \ + vnet_rewrite_get_data_internal (&((rw).rewrite_header), sizeof ((rw).rewrite_data)) + +always_inline void +vnet_rewrite_copy_one (vnet_rewrite_data_t * p0, vnet_rewrite_data_t * rw0, int i) +{ + p0[-i] = rw0[-i]; +} + +void vnet_rewrite_copy_slow_path (vnet_rewrite_data_t * p0, + vnet_rewrite_data_t * rw0, + word n_left, + uword most_likely_size); + +typedef CLIB_PACKED (struct { + u64 a; + u32 b; + u16 c; +}) eh_copy_t; + +always_inline void +_vnet_rewrite_one_header (vnet_rewrite_header_t * h0, + void * packet0, + int max_size, + int most_likely_size) +{ + vnet_rewrite_data_t * p0 = packet0; + vnet_rewrite_data_t * rw0 = (vnet_rewrite_data_t *) (h0->data + max_size); + word n_left0; + + /* 0xfefe => poisoned adjacency => crash */ + ASSERT (h0->data_bytes != 0xfefe); + + if (PREDICT_TRUE (h0->data_bytes == sizeof (eh_copy_t))) + { + eh_copy_t * s, * d; + s = (eh_copy_t *)(h0->data + max_size - sizeof (eh_copy_t)); + d = (eh_copy_t *)(((u8 *)packet0) - sizeof (eh_copy_t)); + __builtin_memcpy (d, s, sizeof (eh_copy_t)); + return; + } + + +#define _(i) \ + do { \ + if (most_likely_size > ((i)-1)*sizeof (vnet_rewrite_data_t)) \ + vnet_rewrite_copy_one (p0, rw0, (i)); \ + } while (0) + + _ (4); + _ (3); + _ (2); + _ (1); + +#undef _ + + n_left0 = (int) + (((int) h0->data_bytes - most_likely_size) + (sizeof(rw0[0])-1)) + / (int) sizeof (rw0[0]); + if (PREDICT_FALSE (n_left0 > 0)) + vnet_rewrite_copy_slow_path (p0, rw0, n_left0, most_likely_size); +} + +always_inline void +_vnet_rewrite_two_headers (vnet_rewrite_header_t * h0, + vnet_rewrite_header_t * h1, + void * packet0, + void * packet1, + int max_size, + int most_likely_size) +{ + vnet_rewrite_data_t * p0 = packet0; + vnet_rewrite_data_t * p1 = packet1; + vnet_rewrite_data_t * rw0 = (vnet_rewrite_data_t *) (h0->data + max_size); + vnet_rewrite_data_t * rw1 = (vnet_rewrite_data_t *) (h1->data + max_size); + word n_left0, n_left1; + int slow_path; + + /* 0xfefe => poisoned adjacency => crash */ + ASSERT (h0->data_bytes != 0xfefe); + ASSERT (h1->data_bytes != 0xfefe); + + /* Arithmetic calculation: bytes0 == bytes1 == 14 */ + slow_path = h0->data_bytes ^ h1->data_bytes; + slow_path += h0->data_bytes ^ sizeof (eh_copy_t); + + if (PREDICT_TRUE (slow_path == 0)) + { + eh_copy_t * s0, * d0, * s1, * d1; + s0 = (eh_copy_t *)(h0->data + max_size - sizeof (eh_copy_t)); + d0 = (eh_copy_t *)(((u8 *)packet0) - sizeof (eh_copy_t)); + __builtin_memcpy (d0, s0, sizeof (eh_copy_t)); + s1 = (eh_copy_t *)(h1->data + max_size - sizeof (eh_copy_t)); + d1 = (eh_copy_t *)(((u8 *)packet1) - sizeof (eh_copy_t)); + __builtin_memcpy (d1, s1, sizeof (eh_copy_t)); + return; + } + +#define _(i) \ + do { \ + if (most_likely_size > ((i)-1)*sizeof (vnet_rewrite_data_t)) \ + { \ + vnet_rewrite_copy_one (p0, rw0, (i)); \ + vnet_rewrite_copy_one (p1, rw1, (i)); \ + } \ + } while (0) + + _ (4); + _ (3); + _ (2); + _ (1); + +#undef _ + + n_left0 = (int) + (((int) h0->data_bytes - most_likely_size) + (sizeof(rw0[0])-1)) + / (int) sizeof (rw0[0]); + n_left1 = (int) + (((int) h1->data_bytes - most_likely_size) + (sizeof(rw1[0])-1)) + / (int) sizeof (rw1[0]); + + if (PREDICT_FALSE (n_left0 > 0 || n_left1 > 0)) + { + vnet_rewrite_copy_slow_path (p0, rw0, n_left0, most_likely_size); + vnet_rewrite_copy_slow_path (p1, rw1, n_left1, most_likely_size); + } +} + +#define vnet_rewrite_one_header(rw0,p0,most_likely_size) \ + _vnet_rewrite_one_header (&((rw0).rewrite_header), (p0), \ + sizeof ((rw0).rewrite_data), \ + (most_likely_size)) + +#define vnet_rewrite_two_headers(rw0,rw1,p0,p1,most_likely_size) \ + _vnet_rewrite_two_headers (&((rw0).rewrite_header), &((rw1).rewrite_header), \ + (p0), (p1), \ + sizeof ((rw0).rewrite_data), \ + (most_likely_size)) + +#define VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST ((void *) 0) +void vnet_rewrite_for_sw_interface (struct vnet_main_t * vnm, + vnet_l3_packet_type_t packet_type, + u32 sw_if_index, + u32 node_index, + void * dst_address, + vnet_rewrite_header_t * rw, + u32 max_rewrite_bytes); + +void vnet_rewrite_for_tunnel (struct vnet_main_t * vnm, + u32 tx_sw_if_index, + u32 rewrite_node_index, + u32 post_rewrite_node_index, + vnet_rewrite_header_t * rw, + u8 *rewrite_data, + u32 rewrite_length); + +/* Parser for unformat header & rewrite string. */ +unformat_function_t unformat_vnet_rewrite; + +format_function_t format_vnet_rewrite; +format_function_t format_vnet_rewrite_header; + +serialize_function_t serialize_vnet_rewrite, unserialize_vnet_rewrite; + +#endif /* included_vnet_rewrite_h */ diff --git a/vnet/vnet/snap/node.c b/vnet/vnet/snap/node.c new file mode 100644 index 00000000000..83d373a22de --- /dev/null +++ b/vnet/vnet/snap/node.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * snap_node.c: snap packet processing + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/llc/llc.h> +#include <vnet/snap/snap.h> + +typedef enum { + SNAP_INPUT_NEXT_DROP, + SNAP_INPUT_NEXT_PUNT, + SNAP_INPUT_NEXT_ETHERNET_TYPE, + SNAP_INPUT_N_NEXT, +} snap_input_next_t; + +typedef struct { + u8 packet_data[32]; +} snap_input_trace_t; + +static u8 * format_snap_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + snap_input_trace_t * t = va_arg (*va, snap_input_trace_t *); + + s = format (s, "%U", format_snap_header, t->packet_data); + + return s; +} + +static uword +snap_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + snap_main_t * sm = &snap_main; + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, + from, + n_left_from, + sizeof (from[0]), + sizeof (snap_input_trace_t)); + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + snap_header_t * h0, * h1; + snap_protocol_info_t * pi0, * pi1; + u8 next0, next1, is_ethernet0, is_ethernet1, len0, len1, enqueue_code; + u32 oui0, oui1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * b2, * b3; + + b2 = vlib_get_buffer (vm, from[2]); + b3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (b2, LOAD); + vlib_prefetch_buffer_header (b3, LOAD); + + CLIB_PREFETCH (b2->data, sizeof (h0[0]), LOAD); + CLIB_PREFETCH (b3->data, sizeof (h1[0]), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = (void *) (b0->data + b0->current_data); + h1 = (void *) (b1->data + b1->current_data); + + oui0 = snap_header_get_oui (h0); + oui1 = snap_header_get_oui (h1); + + is_ethernet0 = oui0 == IEEE_OUI_ethernet; + is_ethernet1 = oui1 == IEEE_OUI_ethernet; + + len0 = sizeof (h0[0]) - (is_ethernet0 ? sizeof (h0->protocol) : 0); + len1 = sizeof (h1[0]) - (is_ethernet1 ? sizeof (h1->protocol) : 0); + + b0->current_data += len0; + b1->current_data += len1; + + b0->current_length -= len0; + b1->current_length -= len1; + + pi0 = snap_get_protocol_info (sm, h0); + pi1 = snap_get_protocol_info (sm, h1); + + next0 = pi0 ? pi0->next_index : SNAP_INPUT_NEXT_DROP; + next1 = pi1 ? pi1->next_index : SNAP_INPUT_NEXT_DROP; + + next0 = is_ethernet0 ? SNAP_INPUT_NEXT_ETHERNET_TYPE : next0; + next1 = is_ethernet1 ? SNAP_INPUT_NEXT_ETHERNET_TYPE : next1; + + /* In case of error. */ + b0->error = node->errors[SNAP_ERROR_UNKNOWN_PROTOCOL]; + b1->error = node->errors[SNAP_ERROR_UNKNOWN_PROTOCOL]; + + enqueue_code = (next0 != next_index) + 2*(next1 != next_index); + + if (PREDICT_FALSE (enqueue_code != 0)) + { + switch (enqueue_code) + { + case 1: + /* A B A */ + to_next[-2] = bi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next0, bi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next1, bi1); + break; + + case 3: + /* A B B or A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, next0, bi0); + vlib_set_next_frame_buffer (vm, node, next1, bi1); + if (next0 == next1) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + next_index = next1; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + } + } + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + snap_header_t * h0; + snap_protocol_info_t * pi0; + u8 next0, is_ethernet0, len0; + u32 oui0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = (void *) (b0->data + b0->current_data); + + oui0 = snap_header_get_oui (h0); + + is_ethernet0 = oui0 == IEEE_OUI_ethernet; + + len0 = sizeof (h0[0]) - (is_ethernet0 ? sizeof (h0->protocol) : 0); + + b0->current_data += len0; + + b0->current_length -= len0; + + pi0 = snap_get_protocol_info (sm, h0); + + next0 = pi0 ? pi0->next_index : SNAP_INPUT_NEXT_DROP; + + next0 = is_ethernet0 ? SNAP_INPUT_NEXT_ETHERNET_TYPE : next0; + + /* In case of error. */ + b0->error = node->errors[SNAP_ERROR_UNKNOWN_PROTOCOL]; + + /* Sent packet to wrong next? */ + if (PREDICT_FALSE (next0 != next_index)) + { + /* Return old frame; remove incorrectly enqueued packet. */ + vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); + + /* Send to correct next. */ + next_index = next0; + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static char * snap_error_strings[] = { +#define _(f,s) s, + foreach_snap_error +#undef _ +}; + +VLIB_REGISTER_NODE (snap_input_node) = { + .function = snap_input, + .name = "snap-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = SNAP_N_ERROR, + .error_strings = snap_error_strings, + + .n_next_nodes = SNAP_INPUT_N_NEXT, + .next_nodes = { + [SNAP_INPUT_NEXT_DROP] = "error-drop", + [SNAP_INPUT_NEXT_PUNT] = "error-punt", + [SNAP_INPUT_NEXT_ETHERNET_TYPE] = "ethernet-input-type", + }, + + .format_buffer = format_snap_header_with_length, + .format_trace = format_snap_input_trace, + .unformat_buffer = unformat_snap_header, +}; + +static clib_error_t * snap_input_init (vlib_main_t * vm) +{ + { + clib_error_t * error = vlib_call_init_function (vm, snap_init); + if (error) + clib_error_report (error); + } + + snap_setup_node (vm, snap_input_node.index); + + llc_register_input_protocol (vm, LLC_PROTOCOL_snap, snap_input_node.index); + + return 0; +} + +VLIB_INIT_FUNCTION (snap_input_init); + +void +snap_register_input_protocol (vlib_main_t * vm, + char * name, + u32 ieee_oui, + u16 protocol, + u32 node_index) +{ + snap_main_t * sm = &snap_main; + snap_protocol_info_t * pi; + snap_header_t h; + snap_oui_and_protocol_t key; + + { + clib_error_t * error = vlib_call_init_function (vm, snap_input_init); + if (error) + clib_error_report (error); + } + + h.protocol = clib_host_to_net_u16 (protocol); + h.oui[0] = (ieee_oui >> 16) & 0xff; + h.oui[1] = (ieee_oui >> 8) & 0xff; + h.oui[2] = (ieee_oui >> 0) & 0xff; + pi = snap_get_protocol_info (sm, &h); + if (pi) + return; + + vec_add2 (sm->protocols, pi, 1); + + pi->name = format (0, "%s", name); + pi->node_index = node_index; + pi->next_index = vlib_node_add_next (vm, + snap_input_node.index, + node_index); + + key.oui = ieee_oui; + key.protocol = clib_host_to_net_u16 (protocol); + + mhash_set (&sm->protocol_hash, &key, pi - sm->protocols, /* old_value */ 0); + hash_set_mem (sm->protocol_info_by_name, name, pi - sm->protocols); +} diff --git a/vnet/vnet/snap/pg.c b/vnet/vnet/snap/pg.c new file mode 100644 index 00000000000..74c363fcc69 --- /dev/null +++ b/vnet/vnet/snap/pg.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * snap_pg.c: packet generator snap interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/snap/snap.h> + +typedef struct { + pg_edit_t oui; + pg_edit_t protocol; +} pg_snap_header_t; + +static inline void +pg_snap_header_init (pg_snap_header_t * e) +{ + pg_edit_init (&e->oui, snap_header_t, oui); + pg_edit_init (&e->protocol, snap_header_t, protocol); +} + +uword +unformat_pg_snap_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_snap_header_t * h; + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (snap_header_t), + &group_index); + pg_snap_header_init (h); + + error = 1; + if (! unformat (input, "%U -> %U", + unformat_pg_edit, + unformat_snap_protocol, &h->oui, &h->protocol)) + goto done; + + { + snap_main_t * pm = &snap_main; + snap_protocol_info_t * pi = 0; + pg_node_t * pg_node = 0; + + if (h->oui.type == PG_EDIT_FIXED + && h->protocol.type == PG_EDIT_FIXED) + { + u8 * o = h->oui.values[PG_EDIT_LO]; + u8 * p = h->protocol.values[PG_EDIT_LO]; + snap_header_t h; + + h.oui[0] = o[0]; + h.oui[1] = o[1]; + h.oui[2] = o[2]; + h.protocol = *(u16 *) p; + pi = snap_get_protocol_info (pm, &h); + if (pi && pi->node_index != ~0) + pg_node = pg_get_node (pi->node_index); + } + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto done; + } + + error = 0; + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/snap/snap.c b/vnet/vnet/snap/snap.c new file mode 100644 index 00000000000..df15f8614c6 --- /dev/null +++ b/vnet/vnet/snap/snap.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * snap.c: snap support + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/snap/snap.h> +#include <vnet/ethernet/ethernet.h> + +/* Global main structure. */ +snap_main_t snap_main; + +static u8 * format_cisco_snap_protocol (u8 * s, va_list * args) +{ + snap_header_t * h = va_arg (*args, snap_header_t *); + u16 protocol = clib_net_to_host_u16 (h->protocol); + char * t = 0; + switch (protocol) + { +#define _(n,f) case n: t = #f; break; + foreach_snap_cisco_protocol; +#undef _ + default: break; + } + if (t) + return format (s, "%s", t); + else + return format (s, "unknown 0x%x", protocol); +} + +u8 * format_snap_protocol (u8 * s, va_list * args) +{ + snap_header_t * h = va_arg (*args, snap_header_t *); + u32 oui = snap_header_get_oui (h); + u16 protocol = clib_net_to_host_u16 (h->protocol); + + switch (oui) + { + case IEEE_OUI_ethernet: + return format (s, "ethernet %U", format_ethernet_type, h->protocol); + + case IEEE_OUI_cisco: + return format (s, "cisco %U", format_cisco_snap_protocol, h); + + default: + return format (s, "oui 0x%06x 0x%04x", oui, protocol); + } +} + +u8 * format_snap_header_with_length (u8 * s, va_list * args) +{ + snap_main_t * sm = &snap_main; + snap_header_t * h = va_arg (*args, snap_header_t *); + snap_protocol_info_t * pi = snap_get_protocol_info (sm, h); + u32 max_header_bytes = va_arg (*args, u32); + uword indent, header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "snap header truncated"); + + indent = format_get_indent (s); + + s = format (s, "SNAP %U", format_snap_protocol, h); + + if (max_header_bytes != 0 && header_bytes > max_header_bytes && pi != 0) + { + vlib_node_t * node = vlib_get_node (sm->vlib_main, pi->node_index); + if (node->format_buffer) + s = format (s, "\n%U%U", + format_white_space, indent, + node->format_buffer, (void *) (h + 1), + max_header_bytes - header_bytes); + } + + return s; +} + +u8 * format_snap_header (u8 * s, va_list * args) +{ + snap_header_t * h = va_arg (*args, snap_header_t *); + return format (s, "%U", format_snap_header_with_length, h, 0); +} + +/* Returns snap protocol as an int in host byte order. */ +uword +unformat_snap_protocol (unformat_input_t * input, va_list * args) +{ + snap_header_t * result = va_arg (*args, snap_header_t *); + snap_main_t * sm = &snap_main; + snap_oui_and_protocol_t p; + u32 i; + + /* Numeric type. */ + if (unformat (input, "0x%x 0x%x", &p.oui, &p.protocol)) + { + if (p.oui >= (1 << 24)) + return 0; + if (p.protocol >= (1 << 16)) + return 0; + } + + /* Named type. */ + else if (unformat_user (input, unformat_vlib_number_by_name, + sm->protocol_info_by_name, &i)) + { + snap_protocol_info_t * pi = vec_elt_at_index (sm->protocols, i); + p = pi->oui_and_protocol; + } + + else + return 0; + + snap_header_set_protocol (result, &p); + return 1; +} + +uword +unformat_snap_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + snap_header_t _h, * h = &_h; + + if (! unformat (input, "%U", unformat_snap_protocol, h)) + return 0; + + /* Add header to result. */ + { + void * p; + u32 n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, n_bytes); + memcpy (p, h, n_bytes); + } + + return 1; +} + +static clib_error_t * snap_init (vlib_main_t * vm) +{ + snap_main_t * sm = &snap_main; + + memset (sm, 0, sizeof (sm[0])); + sm->vlib_main = vm; + + mhash_init (&sm->protocol_hash, sizeof (uword), sizeof (snap_oui_and_protocol_t)); + + sm->protocol_info_by_name + = hash_create_string (/* elts */ 0, sizeof (uword)); + + return vlib_call_init_function (vm, snap_input_init); +} + +VLIB_INIT_FUNCTION (snap_init); + diff --git a/vnet/vnet/snap/snap.h b/vnet/vnet/snap/snap.h new file mode 100644 index 00000000000..ed4a10e207e --- /dev/null +++ b/vnet/vnet/snap/snap.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * snap.h: SNAP definitions + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_snap_h +#define included_snap_h + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> + +#define foreach_ieee_oui \ + _ (0x000000, ethernet) \ + _ (0x00000c, cisco) + +typedef enum { +#define _(n,f) IEEE_OUI_##f = n, + foreach_ieee_oui +#undef _ +} ieee_oui_t; + +#define foreach_snap_cisco_protocol \ + _ (0x0102, drip) \ + _ (0x0104, port_aggregation_protocol) \ + _ (0x0105, mls_hello) \ + _ (0x010b, per_vlan_spanning_tree) \ + _ (0x010c, vlan_bridge) \ + _ (0x0111, unidirectional_link_detection) \ + _ (0x2000, cdp) \ + _ (0x2001, cgmp) \ + _ (0x2003, vtp) \ + _ (0x2004, dtp) \ + _ (0x200a, stp_uplink_fast) + +typedef enum { +#define _(n,f) SNAP_cisco_##f = n, + foreach_snap_cisco_protocol +#undef _ +} snap_cisco_protocol_t; + +typedef union { + CLIB_PACKED (struct { + /* OUI: organization unique identifier. */ + u8 oui[3]; + + /* Per-OUI protocol. */ + u16 protocol; + }); + + u8 as_u8[5]; +} snap_header_t; + +typedef struct { + u32 oui; + u32 protocol; +} snap_oui_and_protocol_t; + +typedef struct { + /* Name vector string. */ + u8 * name; + + snap_oui_and_protocol_t oui_and_protocol; + + /* Node which handles this type. */ + u32 node_index; + + /* snap-input next index for this type. */ + u32 next_index; +} snap_protocol_info_t; + +always_inline void +snap_header_set_protocol (snap_header_t * h, snap_oui_and_protocol_t * p) +{ + u16 protocol = p->protocol; + u32 oui = p->oui; + h->protocol = clib_host_to_net_u16 (protocol); + h->oui[0] = (oui >> 16) & 0xff; + h->oui[1] = (oui >> 8) & 0xff; + h->oui[2] = (oui >> 0) & 0xff; +} + +#define foreach_snap_error \ + _ (NONE, "no error") \ + _ (UNKNOWN_PROTOCOL, "unknown oui/snap protocol") + +typedef enum { +#define _(f,s) SNAP_ERROR_##f, + foreach_snap_error +#undef _ + SNAP_N_ERROR, +} snap_error_t; + +typedef struct { + vlib_main_t * vlib_main; + + /* Vector of known SNAP oui/protocol pairs. */ + snap_protocol_info_t * protocols; + + /* Hash table mapping oui/protocol to protocol index. */ + mhash_t protocol_hash; + + /* Hash table mapping protocol by name. */ + uword * protocol_info_by_name; +} snap_main_t; + +always_inline u32 +snap_header_get_oui (snap_header_t * h) +{ + return (h->oui[0] << 16) | (h->oui[1] << 8) | h->oui[2]; +} + +always_inline snap_protocol_info_t * +snap_get_protocol_info (snap_main_t * sm, snap_header_t * h) +{ + snap_oui_and_protocol_t key; + uword * p; + + key.oui = snap_header_get_oui (h); + key.protocol = h->protocol; + + p = mhash_get (&sm->protocol_hash, &key); + return p ? vec_elt_at_index (sm->protocols, p[0]) : 0; +} + +snap_main_t snap_main; + +/* Register given node index to take input for given snap type. */ +void +snap_register_input_protocol (vlib_main_t * vm, + char * name, + u32 ieee_oui, + u16 protocol, + u32 node_index); + +void snap_set_adjacency (vnet_rewrite_header_t * rw, + uword max_data_bytes, + u32 ieee_oui, + u16 protocol); + +format_function_t format_snap_protocol; +format_function_t format_snap_header; +format_function_t format_snap_header_with_length; + +/* Parse snap protocol as 0xXXXX or protocol name. */ +unformat_function_t unformat_snap_protocol; + +/* Parse snap header. */ +unformat_function_t unformat_snap_header; +unformat_function_t unformat_pg_snap_header; + +always_inline void +snap_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + pg_node_t * pn = pg_get_node (node_index); + + n->format_buffer = format_snap_header_with_length; + n->unformat_buffer = unformat_snap_header; + pn->unformat_edit = unformat_pg_snap_header; +} + +#endif /* included_snap_h */ diff --git a/vnet/vnet/sr/rfc_draft_05.txt b/vnet/vnet/sr/rfc_draft_05.txt new file mode 100644 index 00000000000..bc41c181ea4 --- /dev/null +++ b/vnet/vnet/sr/rfc_draft_05.txt @@ -0,0 +1,1265 @@ +Network Working Group S. Previdi, Ed. +Internet-Draft C. Filsfils +Intended status: Standards Track Cisco Systems, Inc. +Expires: June 12, 2015 B. Field + Comcast + I. Leung + Rogers Communications + December 9, 2014 + + + IPv6 Segment Routing Header (SRH) + draft-previdi-6man-segment-routing-header-05 + +Abstract + + Segment Routing (SR) allows a node to steer a packet through a + controlled set of instructions, called segments, by prepending a SR + header to the packet. A segment can represent any instruction, + topological or service-based. SR allows to enforce a flow through + any path (topological, or application/service based) while + maintaining per-flow state only at the ingress node to the SR domain. + + Segment Routing can be applied to the IPv6 data plane with the + addition of a new type of Routing Extension Header. This draft + describes the Segment Routing Extension Header Type and how it is + used by SR capable nodes. + +Requirements Language + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [RFC2119]. + +Status of This Memo + + This Internet-Draft is submitted in full conformance with the + provisions of BCP 78 and BCP 79. + + Internet-Drafts are working documents of the Internet Engineering + Task Force (IETF). Note that other groups may also distribute + working documents as Internet-Drafts. The list of current Internet- + Drafts is at http://datatracker.ietf.org/drafts/current/. + + Internet-Drafts are draft documents valid for a maximum of six months + and may be updated, replaced, or obsoleted by other documents at any + time. It is inappropriate to use Internet-Drafts as reference + material or to cite them other than as "work in progress." + + + + +Previdi, et al. Expires June 12, 2015 [Page 1] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + This Internet-Draft will expire on June 12, 2015. + +Copyright Notice + + Copyright (c) 2014 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + (http://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + +Table of Contents + + 1. Structure of this document . . . . . . . . . . . . . . . . . 3 + 2. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3 + 3. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3 + 3.1. Data Planes supporting Segment Routing . . . . . . . . . 4 + 3.2. Illustration . . . . . . . . . . . . . . . . . . . . . . 4 + 4. Abstract Routing Model . . . . . . . . . . . . . . . . . . . 7 + 4.1. Segment Routing Global Block (SRGB) . . . . . . . . . . . 8 + 4.2. Traffic Engineering with SR . . . . . . . . . . . . . . . 9 + 4.3. Segment Routing Database . . . . . . . . . . . . . . . . 10 + 5. IPv6 Instantiation of Segment Routing . . . . . . . . . . . . 10 + 5.1. Segment Identifiers (SIDs) and SRGB . . . . . . . . . . . 10 + 5.1.1. Node-SID . . . . . . . . . . . . . . . . . . . . . . 11 + 5.1.2. Adjacency-SID . . . . . . . . . . . . . . . . . . . . 11 + 5.2. Segment Routing Extension Header (SRH) . . . . . . . . . 11 + 5.2.1. SRH and RFC2460 behavior . . . . . . . . . . . . . . 15 + 6. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 15 + 6.1. Segment Routing Operations . . . . . . . . . . . . . . . 15 + 6.2. Segment Routing Node Functions . . . . . . . . . . . . . 16 + 6.2.1. Ingress SR Node . . . . . . . . . . . . . . . . . . . 16 + 6.2.2. Transit Non-SR Capable Node . . . . . . . . . . . . . 18 + 6.2.3. SR Intra Segment Transit Node . . . . . . . . . . . . 18 + 6.2.4. SR Segment Endpoint Node . . . . . . . . . . . . . . 18 + 6.3. FRR Flag Settings . . . . . . . . . . . . . . . . . . . . 18 + 7. SR and Tunneling . . . . . . . . . . . . . . . . . . . . . . 18 + 8. Example Use Case . . . . . . . . . . . . . . . . . . . . . . 19 + 9. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 21 + 10. Manageability Considerations . . . . . . . . . . . . . . . . 21 + 11. Security Considerations . . . . . . . . . . . . . . . . . . . 21 + 12. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 21 + + + +Previdi, et al. Expires June 12, 2015 [Page 2] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + 13. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 21 + 14. References . . . . . . . . . . . . . . . . . . . . . . . . . 21 + 14.1. Normative References . . . . . . . . . . . . . . . . . . 21 + 14.2. Informative References . . . . . . . . . . . . . . . . . 21 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 22 + +1. Structure of this document + + Section 3 gives an introduction on SR for IPv6 networks. + + Section 4 describes the Segment Routing abstract model. + + Section 5 defines the Segment Routing Header (SRH) allowing + instantiation of SR over IPv6 dataplane. + + Section 6 details the procedures of the Segment Routing Header. + +2. Segment Routing Documents + + Segment Routing terminology is defined in + [I-D.filsfils-spring-segment-routing]. + + Segment Routing use cases are described in + [I-D.filsfils-spring-segment-routing-use-cases]. + + Segment Routing IPv6 use cases are described in + [I-D.ietf-spring-ipv6-use-cases]. + + Segment Routing protocol extensions are defined in + [I-D.ietf-isis-segment-routing-extensions], and + [I-D.psenak-ospf-segment-routing-ospfv3-extension]. + + The security mechanisms of the Segment Routing Header (SRH) are + described in [I-D.vyncke-6man-segment-routing-security]. + +3. Introduction + + Segment Routing (SR), defined in + [I-D.filsfils-spring-segment-routing], allows a node to steer a + packet through a controlled set of instructions, called segments, by + prepending a SR header to the packet. A segment can represent any + instruction, topological or service-based. SR allows to enforce a + flow through any path (topological or service/application based) + while maintaining per-flow state only at the ingress node to the SR + domain. Segments can be derived from different components: IGP, BGP, + Services, Contexts, Locators, etc. The list of segment forming the + path is called the Segment List and is encoded in the packet header. + + + + +Previdi, et al. Expires June 12, 2015 [Page 3] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + SR allows the use of strict and loose source based routing paradigms + without requiring any additional signaling protocols in the + infrastructure hence delivering an excellent scalability property. + + The source based routing model described in + [I-D.filsfils-spring-segment-routing] is inherited from the ones + proposed by [RFC1940] and [RFC2460]. The source based routing model + offers the support for explicit routing capability. + +3.1. Data Planes supporting Segment Routing + + Segment Routing (SR), can be instantiated over MPLS + ([I-D.filsfils-spring-segment-routing-mpls]) and IPv6. This document + defines its instantiation over the IPv6 data-plane based on the use- + cases defined in [I-D.ietf-spring-ipv6-use-cases]. + + Segment Routing for IPv6 (SR-IPv6) is required in networks where MPLS + data-plane is not used or, when combined with SR-MPLS, in networks + where MPLS is used in the core and IPv6 is used at the edge (home + networks, datacenters). + + This document defines a new type of Routing Header (originally + defined in [RFC2460]) called the Segment Routing Header (SRH) in + order to convey the Segment List in the packet header as defined in + [I-D.filsfils-spring-segment-routing]. Mechanisms through which + segment are known and advertised are outside the scope of this + document. + +3.2. Illustration + + In the context of Figure 1 where all the links have the same IGP + cost, let us assume that a packet P enters the SR domain at an + ingress edge router I and that the operator requests the following + requirements for packet P: + + The local service S offered by node B must be applied to packet P. + + The links AB and CE cannot be used to transport the packet P. + + Any node N along the journey of the packet should be able to + determine where the packet P entered the SR domain and where it + will exit. The intermediate node should be able to determine the + paths from the ingress edge router to itself, and from itself to + the egress edge router. + + Per-flow State for packet P should only be created at the ingress + edge router. + + + + +Previdi, et al. Expires June 12, 2015 [Page 4] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + The operator can forbid, for security reasons, anyone outside the + operator domain to exploit its intra-domain SR capabilities. + + I---A---B---C---E + \ | / \ / + \ | / F + \|/ + D + + Figure 1: An illustration of SR properties + + All these properties may be realized by instructing the ingress SR + edge router I to push the following abstract SR header on the packet + P. + + +---------------------------------------------------------------+ + | | | + | Abstract SR Header | | + | | | + | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported | + | ^ | | Packet | + | | | | P | + | +---------------------+ | | + | | | + +---------------------------------------------------------------+ + + Figure 2: Packet P at node I + + The abstract SR header contains a source route encoded as a list of + segments {SD, SB, SS, SF, SE}, a pointer (Ptr) and the identification + of the ingress and egress SR edge routers (segments SI and SE). + + A segment identifies a topological instruction or a service + instruction. A segment can either be global or local. The + instruction associated with a global segment is recognized and + executed by any SR-capable node in the domain. The instruction + associated with a local segment is only supported by the specific + node that originates it. + + Let us assume some IGP (i.e.: ISIS and OSPF) extensions to define a + "Node Segment" as a global instruction within the IGP domain to + forward a packet along the shortest path to the specified node. Let + us further assume that within the SR domain illustrated in Figure 1, + segments SI, SD, SB, SE and SF respectively identify IGP node + segments to I, D, B, E and F. + + Let us assume that node B identifies its local service S with local + segment SS. + + + +Previdi, et al. Expires June 12, 2015 [Page 5] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + With all of this in mind, let us describe the journey of the packet + P. + + The packet P reaches the ingress SR edge router. I pushes the SR + header illustrated in Figure 2 and sets the pointer to the first + segment of the list (SD). + + SD is an instruction recognized by all the nodes in the SR domain + which causes the packet to be forwarded along the shortest path to D. + + Once at D, the pointer is incremented and the next segment is + executed (SB). + + SB is an instruction recognized by all the nodes in the SR domain + which causes the packet to be forwarded along the shortest path to B. + + Once at B, the pointer is incremented and the next segment is + executed (SS). + + SS is an instruction only recognized by node B which causes the + packet to receive service S. + + Once the service applied, the next segment is executed (SF) which + causes the packet to be forwarded along the shortest path to F. + + Once at F, the pointer is incremented and the next segment is + executed (SE). + + SE is an instruction recognized by all the nodes in the SR domain + which causes the packet to be forwarded along the shortest path to E. + + E then removes the SR header and the packet continues its journey + outside the SR domain. + + All of the requirements are met. + + First, the packet P has not used links AB and CE: the shortest-path + from I to D is I-A-D, the shortest-path from D to B is D-B, the + shortest-path from B to F is B-C-F and the shortest-path from F to E + is F-E, hence the packet path through the SR domain is I-A-D-B-C-F-E + and the links AB and CE have been avoided. + + Second, the service S supported by B has been applied on packet P. + + Third, any node along the packet path is able to identify the service + and topological journey of the packet within the SR domain. For + example, node C receives the packet illustrated in Figure 3 and hence + is able to infer where the packet entered the SR domain (SI), how it + + + +Previdi, et al. Expires June 12, 2015 [Page 6] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + got up to itself {SD, SB, SS, SE}, where it will exit the SR domain + (SE) and how it will do so {SF, SE}. + + +---------------------------------------------------------------+ + | | | + | SR Header | | + | | | + | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported | + | ^ | | Packet | + | | | | P | + | +--------+ | | + | | | + +---------------------------------------------------------------+ + + Figure 3: Packet P at node C + + Fourth, only node I maintains per-flow state for packet P. The + entire program of topological and service instructions to be executed + by the SR domain on packet P is encoded by the ingress edge router I + in the SR header in the form of a list of segments where each segment + identifies a specific instruction. No further per-flow state is + required along the packet path. The per-flow state is in the SR + header and travels with the packet. Intermediate nodes only hold + states related to the IGP global node segments and the local IGP + adjacency segments. These segments are not per-flow specific and + hence scale very well. Typically, an intermediate node would + maintain in the order of 100's to 1000's global node segments and in + the order of 10's to 100 of local adjacency segments. Typically the + SR IGP forwarding table is expected to be much less than 10000 + entries. + + Fifth, the SR header is inserted at the entrance to the domain and + removed at the exit of the operator domain. For security reasons, + the operator can forbid anyone outside its domain to use its intra- + domain SR capability. + +4. Abstract Routing Model + + At the entrance of the SR domain, the ingress SR edge router pushes + the SR header on top of the packet. At the exit of the SR domain, + the egress SR edge router removes the SR header. + + The abstract SR header contains an ordered list of segments, a + pointer identifying the next segment to process and the + identifications of the ingress and egress SR edge routers on the path + of this packet. The pointer identifies the segment that MUST be used + by the receiving router to process the packet. This segment is + called the active segment. + + + +Previdi, et al. Expires June 12, 2015 [Page 7] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + A property of SR is that the entire source route of the packet, + including the identity of the ingress and egress edge routers is + always available with the packet. This allows for interesting + accounting and service applications. + + We define three SR-header operations: + + "PUSH": an SR header is pushed on an IP packet, or additional + segments are added at the head of the segment list. The pointer + is moved to the first entry of the added segments. + + "NEXT": the active segment is completed, the pointer is moved to + the next segment in the list. + + "CONTINUE": the active segment is not completed, the pointer is + left unchanged. + + In the future, other SR-header management operations may be defined. + + As the packet travels through the SR domain, the pointer is + incremented through the ordered list of segments and the source route + encoded by the SR ingress edge node is executed. + + A node processes an incoming packet according to the instruction + associated with the active segment. + + Any instruction might be associated with a segment: for example, an + intra-domain topological strict or loose forwarding instruction, a + service instruction, etc. + + At minimum, a segment instruction must define two elements: the + identity of the next-hop to forward the packet to (this could be the + same node or a context within the node) and which SR-header + management operation to execute. + + Each segment is known in the network through a Segment Identifier + (SID). The terms "segment" and "SID" are interchangeable. + +4.1. Segment Routing Global Block (SRGB) + + In the SR abstract model, a segment is identified by a Segment + Routing Identifier (SID). The SR abstract model doesn't mandate a + specific format for the SID (IPv6 address or other formats). + + In Segment Routing IPv6 the SID is an IPv6 address. Therefore, the + SRGB is materialized by the global IPv6 address space which + represents the set of IPv6 routable addresses in the SR domain. The + following rules apply: + + + +Previdi, et al. Expires June 12, 2015 [Page 8] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + o Each node of the SR domain MUST be configured with the Segment + Routing Global Block (SRGB). + + o All global segments must be allocated from the SRGB. Any SR + capable node MUST be able to process any global segment advertised + by any other node within the SR domain. + + o Any segment outside the SRGB has a local significance and is + called a "local segment". An SR-capable node MUST be able to + process the local segments it originates. An SR-capable node MUST + NOT support the instruction associated with a local segment + originated by a remote node. + +4.2. Traffic Engineering with SR + + An SR Traffic Engineering policy is composed of two elements: a flow + classification and a segment-list to prepend on the packets of the + flow. + + In SR, this per-flow state only exists at the ingress edge node where + the policy is defined and the SR header is pushed. + + It is outside the scope of the document to define the process that + leads to the instantiation at a node N of an SR Traffic Engineering + policy. + + [I-D.filsfils-spring-segment-routing-use-cases] illustrates various + alternatives: + + N is deriving this policy automatically (e.g. FRR). + + N is provisioned explicitly by the operator. + + N is provisioned by a controller or server (e.g.: SDN Controller). + + N is provisioned by the operator with a high-level policy which is + mapped into a path thanks to a local CSPF-based computation (e.g. + affinity/SRLG exclusion). + + N could also be provisioned by other means. + + [I-D.filsfils-spring-segment-routing-use-cases] explains why the + majority of use-cases require very short segment-lists, hence + minimizing the performance impact, if any, of inserting and + transporting the segment list. + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 9] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + A SDN controller, which desires to instantiate at node N an SR + Traffic Engineering policy, collects the SR capability of node N such + as to ensure that the policy meets its capability. + +4.3. Segment Routing Database + + The Segment routing Database (SRDB) is a set of entries where each + entry is identified by a SID. The instruction associated with each + entry at least defines the identity of the next-hop to which the + packet should be forwarded and what operation should be performed on + the SR header (PUSH, CONTINUE, NEXT). + + +---------+-----------+---------------------------------+ + | Segment | Next-Hop | SR Header operation | + +---------+-----------+---------------------------------+ + | Sk | M | CONTINUE | + | Sj | N | NEXT | + | Sl | NAT Srvc | NEXT | + | Sm | FW srvc | NEXT | + | Sn | Q | NEXT | + | etc. | etc. | etc. | + +---------+-----------+---------------------------------+ + + Figure 4: SR Database + + Each SR-capable node maintains its local SRDB. SRDB entries can + either derive from local policy or from protocol segment + advertisement. + +5. IPv6 Instantiation of Segment Routing + +5.1. Segment Identifiers (SIDs) and SRGB + + Segment Routing, as described in + [I-D.filsfils-spring-segment-routing], defines Node-SID and + Adjacency-SID. When SR is used over IPv6 data-plane the following + applies. + + The SRGB is the global IPv6 address space which represents the set of + IPv6 routable addresses in the SR domain. + + Node SIDs are IPv6 addresses part of the SRGB (i.e.: routable + addresses). Adjacency-SIDs are IPv6 addresses which may not be part + of the global IPv6 address space. + + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 10] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + +5.1.1. Node-SID + + The Node-SID identifies a node. With SR-IPv6 the Node-SID is an IPv6 + prefix that the operator configured on the node and that is used as + the node identifier. Typically, in case of a router, this is the + IPv6 address of the node loopback interface. Therefore, SR-IPv6 does + not require any additional SID advertisement for the Node Segment. + The Node-SID is in fact the IPv6 address of the node. + +5.1.2. Adjacency-SID + + In the SR architecture defined in + [I-D.filsfils-spring-segment-routing] the Adjacency-SID (or Adj-SID) + identifies a given interface and may be local or global (depending on + how it is advertised). A node may advertise one (or more) Adj-SIDs + allocated to a given interface so to force the forwarding of the + packet (when received with that particular Adj-SID) into the + interface regardless the routing entry for the packet destination. + The semantic of the Adj-SID is: + + Send out the packet to the interface this prefix is allocated to. + + When SR is applied to IPv6, any SID is in a global IPv6 address and + therefore, an Adj-SID has a global significance (i.e.: the IPv6 + address representing the SID is a global address). In other words, a + node that advertises the Adj-SID in the form of a global IPv6 address + representing the link/adjacency the packet has to be forwarded to, + will apply to the Adj-SID a global significance. + + Advertisement of Adj-SID may be done using multiple mechanisms among + which the ones described in ISIS and OSPF protocol extensions: + [I-D.ietf-isis-segment-routing-extensions] and + [I-D.psenak-ospf-segment-routing-ospfv3-extension]. The distinction + between local and global significance of the Adj-SID is given in the + encoding of the Adj-SID advertisement. + +5.2. Segment Routing Extension Header (SRH) + + A new type of the Routing Header (originally defined in [RFC2460]) is + defined: the Segment Routing Header (SRH) which has a new Routing + Type, (suggested value 4) to be assigned by IANA. + + As an example, if an explicit path is to be constructed across a core + network running ISIS or OSPF, the segment list will contain SIDs + representing the nodes across the path (loose or strict) which, + usually, are the IPv6 loopback interface address of each node. If + the path is across service or application entities, the segment list + + + + +Previdi, et al. Expires June 12, 2015 [Page 11] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + contains the IPv6 addresses of these services or application + instances. + + The Segment Routing Header (SRH) is defined as follows: + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Next Header | Hdr Ext Len | Routing Type | Segments Left | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | First Segment | Flags | HMAC Key ID | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Segment List[0] (128 bits ipv6 address) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | | + ... + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Segment List[n] (128 bits ipv6 address) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Policy List[0] (optional) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Policy List[1] (optional) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Policy List[2] (optional) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | | + | | + | HMAC (256 bits) | + + + +Previdi, et al. Expires June 12, 2015 [Page 12] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + | (optional) | + | | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Next Header: 8-bit selector. Identifies the type of header + immediately following the SRH. + + o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH + header in 8-octet units, not including the first 8 octets. + + o Routing Type: TBD, to be assigned by IANA (suggested value: 4). + + o Segments Left. Defined in [RFC2460], it contains the index, in + the Segment List, of the next segment to inspect. Segments Left + is decremented at each segment and it is used as an index in the + segment list. + + o First Segment: offset in the SRH, not including the first 8 octets + and expressed in 16-octet units, pointing to the last element of + the segment list, which is in fact the first segment of the + segment routing path. + + o Flags: 16 bits of flags. Following flags are defined: + + 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |C|P|R|R| Policy Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + C-flag: Clean-up flag. Set when the SRH has to be removed from + the packet when packet reaches the last segment. + + P-flag: Protected flag. Set when the packet has been rerouted + through FRR mechanism by a SR endpoint node. See Section 6.3 + for more details. + + R-flags. Reserved and for future use. + + Policy Flags. Define the type of the IPv6 addresses encoded + into the Policy List (see below). The following have been + defined: + + + + + +Previdi, et al. Expires June 12, 2015 [Page 13] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + Bits 4-6: determine the type of the first element after the + segment list. + + Bits 7-9: determine the type of the second element. + + Bits 10-12: determine the type of the third element. + + Bits 13-15: determine the type of the fourth element. + + The following values are used for the type: + + 0x0: Not present. If value is set to 0x0, it means the + element represented by these bits is not present. + + 0x1: SR Ingress. + + 0x2: SR Egress. + + 0x3: Original Source Address. + + o HMAC Key ID and HMAC field, and their use are defined in + [I-D.vyncke-6man-segment-routing-security]. + + o Segment List[n]: 128 bit IPv6 addresses representing the nth + segment in the Segment List. The Segment List is encoded starting + from the last segment of the path. I.e., the first element of the + segment list (Segment List [0]) contains the last segment of the + path while the last segment of the Segment List (Segment List[n]) + contains the first segment of the path. The index contained in + "Segments Left" identifies the current active segment. + + o Policy List. Optional addresses representing specific nodes in + the SR path such as: + + SR Ingress: a 128 bit generic identifier representing the + ingress in the SR domain (i.e.: it needs not to be a valid IPv6 + address). + + SR Egress: a 128 bit generic identifier representing the egress + in the SR domain (i.e.: it needs not to be a valid IPv6 + address). + + Original Source Address: IPv6 address originally present in the + SA field of the packet. + + The segments in the Policy List are encoded after the segment list + and they are optional. If none are in the SRH, all bits of the + Policy List Flags MUST be set to 0x0. + + + +Previdi, et al. Expires June 12, 2015 [Page 14] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + +5.2.1. SRH and RFC2460 behavior + + The SRH being a new type of the Routing Header, it also has the same + properties: + + SHOULD only appear once in the packet. + + Only the router whose address is in the DA field of the packet + header MUST inspect the SRH. + + Therefore, Segment Routing in IPv6 networks implies that the segment + identifier (i.e.: the IPv6 address of the segment) is moved into the + DA of the packet. + + The DA of the packet changes at each segment termination/completion + and therefore the original DA of the packet MUST be encoded as the + last segment of the path. + + As illustrated in Section 3.2, nodes that are within the path of a + segment will forward packets based on the DA of the packet without + inspecting the SRH. This ensures full interoperability between SR- + capable and non-SR-capable nodes. + +6. SRH Procedures + + In this section we describe the different procedures on the SRH. + +6.1. Segment Routing Operations + + When Segment Routing is instantiated over the IPv6 data plane the + following applies: + + o The segment list is encoded in the SRH. + + o The active segment is in the destination address of the packet. + + o The Segment Routing CONTINUE operation (as described in + [I-D.filsfils-spring-segment-routing]) is implemented as a + regular/plain IPv6 operation consisting of DA based forwarding. + + o The NEXT operation is implemented through the update of the DA + with the value represented by the Next Segment field in the SRH. + + o The PUSH operation is implemented through the insertion of the SRH + or the insertion of additional segments in the SRH segment list. + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 15] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + +6.2. Segment Routing Node Functions + + SR packets are forwarded to segments endpoints (i.e.: nodes whose + address is in the DA field of the packet). The segment endpoint, + when receiving a SR packet destined to itself, does: + + o Inspect the SRH. + + o Determine the next active segment. + + o Update the Segments Left field (or, if requested, remove the SRH + from the packet). + + o Update the DA. + + o Send the packet to the next segment. + + The procedures applied to the SRH are related to the node function. + Following nodes functions are defined: + + Ingress SR Node. + + Transit Non-SR Node. + + Transit SR Intra Segment Node. + + SR Endpoint Node. + +6.2.1. Ingress SR Node + + Ingress Node can be a router at the edge of the SR domain or a SR- + capable host. The ingress SR node may obtain the segment list by + either: + + Local path computation. + + Local configuration. + + Interaction with an SDN controller delivering the path as a + complete SRH. + + Any other mechanism (mechanisms through which the path is acquired + are outside the scope of this document). + + When creating the SRH (either at ingress node or in the SDN + controller) the following is done: + + Next Header and Hdr Ext Len fields are set according to [RFC2460]. + + + +Previdi, et al. Expires June 12, 2015 [Page 16] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + Routing Type field is set as TBD (SRH). + + The Segment List is built with the FIRST segment of the path + encoded in the LAST element of the Segment List. Subsequent + segments are encoded on top of the first segment. Finally, the + LAST segment of the path is encoded in the FIRST element of the + Segment List. In other words, the Segment List is encoded in the + reverse order of the path. + + The original DA of the packet is encoded as the last segment of + the path (encoded in the first element of the Segment List). + + the DA of the packet is set with the value of the first segment + (found in the last element of the segment list). + + the Segments Left field is set to n-1 where n is the number of + elements in the Segment List. + + The packet is sent out towards the first segment (i.e.: + represented in the packet DA). + +6.2.1.1. Security at Ingress + + The procedures related to the Segment Routing security are detailed + in [I-D.vyncke-6man-segment-routing-security]. + + In the case where the SR domain boundaries are not under control of + the network operator (e.g.: when the SR domain edge is in a home + network), it is important to authenticate and validate the content of + any SRH being received by the network operator. In such case, the + security procedure described in + [I-D.vyncke-6man-segment-routing-security] is to be used. + + The ingress node (e.g.: the host in the home network) requests the + SRH from a control system (e.g.: an SDN controller) which delivers + the SRH with its HMAC signature on it. + + Then, the home network host can send out SR packets (with an SRH on + it) that will be validated at the ingress of the network operator + infrastructure. + + The ingress node of the network operator infrastructure, is + configured in order to validate the incoming SRH HMACs in order to + allow only packets having correct SRH according to their SA/DA + addresses. + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 17] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + +6.2.2. Transit Non-SR Capable Node + + SR is interoperable with plain IPv6 forwarding. Any non SR-capable + node will forward SR packets solely based on the DA. There's no SRH + inspection. This ensures full interoperability between SR and non-SR + nodes. + +6.2.3. SR Intra Segment Transit Node + + Only the node whose address is in DA inspects and processes the SRH + (according to [RFC2460]). An intra segment transit node is not in + the DA and its forwarding is based on DA and its SR-IPv6 FIB. + +6.2.4. SR Segment Endpoint Node + + The SR segment endpoint node is the node whose address is in the DA. + The segment endpoint node inspects the SRH and does: + + 1. IF DA = myself (segment endpoint) + 2. IF Segments Left > 0 THEN + decrement Segments Left + update DA with Segment List[Segments Left] + 3. ELSE IF Segments List[Segments Left] <> DA THEN + update DA with Segments List[Segments Left] + IF Clean-up bit is set THEN remove the SRH + 4. ELSE give the packet to next PID (application) + End of processing. + 5. Forward the packet out + +6.3. FRR Flag Settings + + A node supporting SR and doing Fast Reroute (as described in + [I-D.filsfils-spring-segment-routing-use-cases], when rerouting + packets through FRR mechanisms, SHOULD inspect the rerouted packet + header and look for the SRH. If the SRH is present, the rerouting + node SHOULD set the Protected bit on all rerouted packets. + +7. SR and Tunneling + + Encapsulation can be realized in two different ways with SR-IPv6: + + Outer encapsulation. + + SRH with SA/DA original addresses. + + Outer encapsulation tunneling is the traditional method where an + additional IPv6 header is prepended to the packet. The original IPv6 + header being encapsulated, everything is preserved and the packet is + + + +Previdi, et al. Expires June 12, 2015 [Page 18] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + switched/routed according to the outer header (that could contain a + SRH). + + SRH allows encoding both original SA and DA, hence an operator may + decide to change the SA/DA at ingress and restore them at egress. + This can be achieved without outer encapsulation, by changing SA/DA + and encoding the original SA in the Policy List and in the original + DA in the Segment List. + +8. Example Use Case + + A more detailed description of use cases are available in + [I-D.ietf-spring-ipv6-use-cases]. In this section, a simple SR-IPv6 + example is illustrated. + + In the topology described in Figure 6 it is assumed an end-to-end SR + deployment. Therefore SR is supported by all nodes from A to J. + + Home Network | Backbone | Datacenter + | | + | +---+ +---+ +---+ | +---+ | + +---|---| C |---| D |---| E |---|---| I |---| + | | +---+ +---+ +---+ | +---+ | + | | | | | | | | +---+ + +---+ +---+ | | | | | | |--| X | + | A |---| B | | +---+ +---+ +---+ | +---+ | +---+ + +---+ +---+ | | F |---| G |---| H |---|---| J |---| + | +---+ +---+ +---+ | +---+ | + | | + | +-----------+ + | SDN | + | Orch/Ctlr | + +-----------+ + + Figure 6: Sample SR topology + + The following workflow applies to packets sent by host A and destined + to server X. + + + + + + + + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 19] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + . Host A sends a request for a path to server X to the SDN + controller or orchestration system. + + . The SDN controller/orchestrator builds a SRH with: + . Segment List: C, F, J, X + . HMAC + that satisfies the requirements expressed in the request + by host A and based on policies applicable to host A. + + . Host A receives the SRH and insert it into the packet. + The packet has now: + . SA: A + . DA: C + . SRH with + . SL: X, J, F, C + . Segments Left: 3 (i.e.: Segment List size - 1) + . PL: C (ingress), J (egress) + Note that X is the last segment and C is the + first segment (i.e.: the SL is encoded in the reverse + path order). + . HMAC + + . When packet arrives in C (first segment), C does: + . Validate the HMAC of the SRH. + . Decrement Segments Left by one: 2 + . Update the DA with the next segment found in + Segment List[2]. DA is set to F. + . Forward the packet to F. + + . When packet arrives in F (second segment), F does: + . Decrement Segments Left by one: 1 + . Update the DA with the next segment found in + Segment List[1]. DA is set to J. + . Forward the packet to J. + + . Packet travels across G and H nodes which do plain + IPv6 forwarding based on DA. No inspection of SRH needs + to be done in these nodes. However, any SR capable node + is allowed to set the Protected bit in case of FRR + protection. + + . When packet arrives in J (third segment), J does: + . Decrement Segments Left by one: 0 + . Update the DA with the next segment found in + Segment List[0]. DA is set to X. + . If the cleanup bit is set, then node J will strip out + the SRH from the packet. + . Forward the packet to X. + + + +Previdi, et al. Expires June 12, 2015 [Page 20] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + The packet arrives in the server that may or may not support SR. The + return traffic, from server to host, may be sent using the same + procedures. + +9. IANA Considerations + + TBD + +10. Manageability Considerations + + TBD + +11. Security Considerations + + Security mechanisms applied to Segment Routing over IPv6 networks are + detailed in [I-D.vyncke-6man-segment-routing-security]. + +12. Contributors + + The authors would like to thank Dave Barach, John Leddy, John + Brzozowski, Pierre Francois, Nagendra Kumar, Mark Townsley, Christian + Martin, Roberta Maglione, Eric Vyncke, James Connolly, David Lebrun + and Fred Baker for their contribution to this document. + +13. Acknowledgements + + TBD + +14. References + +14.1. Normative References + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 + (IPv6) Specification", RFC 2460, December 1998. + +14.2. Informative References + + [I-D.filsfils-spring-segment-routing] + Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., + Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., + Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe, + "Segment Routing Architecture", draft-filsfils-spring- + segment-routing-04 (work in progress), July 2014. + + + + + +Previdi, et al. Expires June 12, 2015 [Page 21] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + [I-D.filsfils-spring-segment-routing-mpls] + Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., + Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., + Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe, + "Segment Routing with MPLS data plane", draft-filsfils- + spring-segment-routing-mpls-03 (work in progress), August + 2014. + + [I-D.filsfils-spring-segment-routing-use-cases] + Filsfils, C., Francois, P., Previdi, S., Decraene, B., + Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., + Ytti, S., Henderickx, W., Tantsura, J., Kini, S., and E. + Crabbe, "Segment Routing Use Cases", draft-filsfils- + spring-segment-routing-use-cases-01 (work in progress), + October 2014. + + [I-D.ietf-isis-segment-routing-extensions] + Previdi, S., Filsfils, C., Bashandy, A., Gredler, H., + Litkowski, S., Decraene, B., and J. Tantsura, "IS-IS + Extensions for Segment Routing", draft-ietf-isis-segment- + routing-extensions-03 (work in progress), October 2014. + + [I-D.ietf-spring-ipv6-use-cases] + Brzozowski, J., Leddy, J., Leung, I., Previdi, S., + Townsley, W., Martin, C., Filsfils, C., and R. Maglione, + "IPv6 SPRING Use Cases", draft-ietf-spring-ipv6-use- + cases-03 (work in progress), November 2014. + + [I-D.psenak-ospf-segment-routing-ospfv3-extension] + Psenak, P., Previdi, S., Filsfils, C., Gredler, H., + Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3 + Extensions for Segment Routing", draft-psenak-ospf- + segment-routing-ospfv3-extension-02 (work in progress), + July 2014. + + [I-D.vyncke-6man-segment-routing-security] + Vyncke, E. and S. Previdi, "IPv6 Segment Routing Header + (SRH) Security Considerations", July 2014. + + [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D. + Zappala, "Source Demand Routing: Packet Format and + Forwarding Specification (Version 1)", RFC 1940, May 1996. + +Authors' Addresses + + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 22] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + Stefano Previdi (editor) + Cisco Systems, Inc. + Via Del Serafico, 200 + Rome 00142 + Italy + + Email: sprevidi@cisco.com + + + Clarence Filsfils + Cisco Systems, Inc. + Brussels + BE + + Email: cfilsfil@cisco.com + + + Brian Field + Comcast + 4100 East Dry Creek Road + Centennial, CO 80122 + US + + Email: Brian_Field@cable.comcast.com + + + Ida Leung + Rogers Communications + 8200 Dixie Road + Brampton, ON L6T 0C1 + CA + + Email: Ida.Leung@rci.rogers.com diff --git a/vnet/vnet/sr/sr.c b/vnet/vnet/sr/sr.c new file mode 100644 index 00000000000..5cc33dc93e6 --- /dev/null +++ b/vnet/vnet/sr/sr.c @@ -0,0 +1,2228 @@ +/* + * sr.c: ipv6 segment routing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/sr/sr.h> + +#include <openssl/hmac.h> + +ip6_sr_main_t sr_main; +static vlib_node_registration_t sr_local_node; + +static void sr_fix_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, + ip6_sr_header_t * sr) +{ + u32 key_index; + static u8 * keybuf; + u8 * copy_target; + int first_segment; + ip6_address_t *addrp; + int i; + ip6_sr_hmac_key_t * hmac_key; + u32 sig_len; + + key_index = sr->hmac_key; + + /* No signature? Pass... */ + if (key_index == 0) + return; + + /* We don't know about this key? Fail... */ + if (key_index >= vec_len (sm->hmac_keys)) + return; + + hmac_key = sm->hmac_keys + key_index; + + vec_reset_length (keybuf); + + /* pkt ip6 src address */ + vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); + memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t)); + + /* first segment */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] = sr->first_segment; + + /* octet w/ bit 0 = "clean" flag */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] + = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)) + ? 0x80 : 0; + + /* hmac key id */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] = sr->hmac_key; + + first_segment = sr->first_segment; + + addrp = sr->segments; + + /* segments */ + for (i = 0; i <= first_segment; i++) + { + vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); + memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t)); + addrp++; + } + + addrp++; + + HMAC_CTX_init(sm->hmac_ctx); + if (!HMAC_Init(sm->hmac_ctx, hmac_key->shared_secret, + vec_len(hmac_key->shared_secret),sm->md)) + clib_warning ("barf1"); + if (!HMAC_Update(sm->hmac_ctx,keybuf,vec_len(keybuf))) + clib_warning ("barf2"); + if (!HMAC_Final(sm->hmac_ctx, (unsigned char *) addrp, &sig_len)) + clib_warning ("barf3"); + HMAC_CTX_cleanup(sm->hmac_ctx); +} + +u8 * format_ip6_sr_header_flags (u8 * s, va_list * args) +{ + u16 flags = (u16) va_arg (*args, int); + u8 pl_flag; + int bswap_needed = va_arg (*args, int); + int i; + + if (bswap_needed) + flags = clib_host_to_net_u16 (flags); + + if (flags & IP6_SR_HEADER_FLAG_CLEANUP) + s = format (s, "cleanup "); + + if (flags & IP6_SR_HEADER_FLAG_PROTECTED) + s = format (s, "reroute "); + + s = format (s, "pl: "); + for (i = 1; i <= 4; i++) + { + pl_flag = ip6_sr_policy_list_flags (flags, i); + s = format (s, "[%d] ", i); + + switch (pl_flag) + { + case IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT: + s = format (s, "NotPr "); + break; + case IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE: + s = format (s, "InPE "); + break; + case IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE: + s = format (s, "EgPE "); + break; + + case IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR: + s = format (s, "OrgSrc "); + break; + } + } + return s; +} + +u8 * format_ip6_sr_header (u8 * s, va_list * args) +{ + ip6_sr_header_t * h = va_arg (*args, ip6_sr_header_t *); + int print_hmac = va_arg (*args, int); + int i, pl_index, max_segs; + int flags_host_byte_order = clib_net_to_host_u16(h->flags); + + s = format (s, "next proto %d, len %d, type %d", + h->protocol, (h->length<<3)+8, h->type); + s = format (s, "\n segs left %d, first_segment %d, hmac key %d", + h->segments_left, h->first_segment, h->hmac_key); + s = format (s, "\n flags %U", format_ip6_sr_header_flags, + flags_host_byte_order, 0 /* bswap needed */ ); + + /* + * Header length is in 8-byte units (minus one), so + * divide by 2 to ascertain the number of ip6 addresses in the + * segment list + */ + max_segs = (h->length>>1); + + if (!print_hmac && h->hmac_key) + max_segs -= 2; + + s = format (s, "\n Segments (in processing order):"); + + for (i = h->first_segment; i >= 0; i--) + s = format (s, "\n %U", format_ip6_address, h->segments + i); + + s = format (s, "\n Policy List:"); + + pl_index = 1; /* to match the RFC text */ + for (i = (h->first_segment+1); i < max_segs; i++, pl_index++) + { + char * tag; + char * tags[] = {" ", "InPE: ", "EgPE: ", "OrgSrc: "}; + + tag = tags[0]; + if (pl_index >=1 && pl_index <= 4) + { + int this_pl_flag = ip6_sr_policy_list_flags + (flags_host_byte_order, pl_index); + tag = tags[this_pl_flag]; + } + + s = format (s, "\n %s%U", tag, format_ip6_address, h->segments + i); + } + + return s; +} + +u8 * format_ip6_sr_header_with_length (u8 * s, va_list * args) +{ + ip6_header_t * h = va_arg (*args, ip6_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + uword header_bytes; + + header_bytes = sizeof (h[0]) + sizeof (ip6_sr_header_t); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "ip6_sr header truncated"); + + s = format (s, "IP6: %U\n", format_ip6_header, h, max_header_bytes); + s = format (s, "SR: %U\n", format_ip6_sr_header, (ip6_sr_header_t *)(h+1), + 0 /* print_hmac */, max_header_bytes); + return s; +} + +#define foreach_sr_rewrite_next \ +_(ERROR, "error-drop") \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(SR_LOCAL, "sr-local") + +typedef enum { +#define _(s,n) SR_REWRITE_NEXT_##s, + foreach_sr_rewrite_next +#undef _ + SR_REWRITE_N_NEXT, +} sr_rewrite_next_t; + +typedef struct { + ip6_address_t src, dst; + u16 length; + u32 next_index; + u32 tunnel_index; + u8 sr[256]; +} sr_rewrite_trace_t; + +static char * sr_rewrite_error_strings[] = { +#define sr_error(n,s) s, +#include "sr_error.def" +#undef sr_error +}; + +typedef enum { +#define sr_error(n,s) SR_REWRITE_ERROR_##n, +#include "sr_error.def" +#undef sr_error + SR_REWRITE_N_ERROR, +} sr_rewrite_error_t; + + +u8 * format_sr_rewrite_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sr_rewrite_trace_t * t = va_arg (*args, sr_rewrite_trace_t *); + ip6_main_t * im = &ip6_main; + ip6_sr_main_t * sm = &sr_main; + ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index); + ip6_fib_t * rx_fib, * tx_fib; + + rx_fib = find_ip6_fib_by_table_index_or_id (im, tun->rx_fib_index, + IP6_ROUTE_FLAG_FIB_INDEX); + + tx_fib = find_ip6_fib_by_table_index_or_id (im, tun->tx_fib_index, + IP6_ROUTE_FLAG_FIB_INDEX); + + s = format + (s, "SR-REWRITE: next %s ip6 src %U dst %U len %u\n" + " rx-fib-id %d tx-fib-id %d\n%U", + (t->next_index == SR_REWRITE_NEXT_SR_LOCAL) + ? "sr-local" : "ip6-lookup", + format_ip6_address, &t->src, + format_ip6_address, &t->dst, t->length, + rx_fib->table_id, tx_fib->table_id, + format_ip6_sr_header, t->sr, 0 /* print_hmac */); + return s; +} + +static uword +sr_rewrite (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + ip6_sr_main_t * sm = &sr_main; + u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *, + vlib_buffer_t *, ip6_header_t *, + ip6_sr_header_t *); + sr_local_cb = sm->sr_local_cb; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + ip6_header_t * ip0, * ip1; + ip_adjacency_t * adj0, * adj1; + ip6_sr_header_t * sr0, * sr1; + ip6_sr_tunnel_t * t0, *t1; + u64 * copy_src0, * copy_dst0; + u64 * copy_src1, * copy_dst1; + u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP; + u32 next1 = SR_REWRITE_NEXT_IP6_LOOKUP; + u16 new_l0, new_l1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* + * $$$ parse through header(s) to pick the point + * where we punch in the SR extention header + */ + + adj0 = ip_get_adjacency (lm, vnet_buffer(b0)->ip.adj_index[VLIB_TX]); + adj1 = ip_get_adjacency (lm, vnet_buffer(b1)->ip.adj_index[VLIB_TX]); + t0 = pool_elt_at_index (sm->tunnels, + adj0->rewrite_header.sw_if_index); + t1 = pool_elt_at_index (sm->tunnels, + adj1->rewrite_header.sw_if_index); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE + >= ((word) vec_len (t0->rewrite)) + b0->current_data); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE + >= ((word) vec_len (t1->rewrite)) + b1->current_data); + + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->tx_fib_index; + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* + * SR-unaware service chaining case: pkt coming back from + * service has the original dst address, and will already + * have an SR header. If so, send it to sr-local + */ + if (PREDICT_FALSE(ip0->protocol == 43)) + { + vlib_buffer_advance (b0, sizeof(ip0)); + sr0 = (ip6_sr_header_t *) (ip0+1); + new_l0 = clib_net_to_host_u16(ip0->payload_length); + next0 = SR_REWRITE_NEXT_SR_LOCAL; + } + else + { + copy_dst0 = (u64 *)(((u8 *)ip0) - vec_len (t0->rewrite)); + copy_src0 = (u64 *) ip0; + + /* + * Copy data before the punch-in point left by the + * required amount. Assume (for the moment) that only + * the main packet header needs to be copied. + */ + copy_dst0 [0] = copy_src0 [0]; + copy_dst0 [1] = copy_src0 [1]; + copy_dst0 [2] = copy_src0 [2]; + copy_dst0 [3] = copy_src0 [3]; + copy_dst0 [4] = copy_src0 [4]; + vlib_buffer_advance (b0, - (word) vec_len(t0->rewrite)); + ip0 = vlib_buffer_get_current (b0); + sr0 = (ip6_sr_header_t *) (ip0+1); + /* $$$ tune */ + memcpy (sr0, t0->rewrite, vec_len (t0->rewrite)); + /* Fix the next header chain */ + sr0->protocol = ip0->protocol; + ip0->protocol = 43; /* routing extension header */ + new_l0 = clib_net_to_host_u16(ip0->payload_length) + + vec_len (t0->rewrite); + ip0->payload_length = clib_host_to_net_u16(new_l0); + /* Rewrite the ip6 dst address */ + ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0]; + ip0->dst_address.as_u64[1] = t0->first_hop.as_u64[1]; + + sr_fix_hmac (sm, ip0, sr0); + + next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : + next0; + + /* + * Ignore "do not rewrite" shtik in this path + */ + if (PREDICT_FALSE (next0 & 0x80000000)) + { + next0 ^= 0xFFFFFFFF; + if (PREDICT_FALSE(next0 == SR_REWRITE_NEXT_ERROR)) + b0->error = + node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; + } + } + + if (PREDICT_FALSE(ip1->protocol == 43)) + { + vlib_buffer_advance (b1, sizeof(ip1)); + sr1 = (ip6_sr_header_t *) (ip1+1); + new_l1 = clib_net_to_host_u16(ip1->payload_length); + next1 = SR_REWRITE_NEXT_SR_LOCAL; + } + else + { + copy_dst1 = (u64 *)(((u8 *)ip1) - vec_len (t1->rewrite)); + copy_src1 = (u64 *) ip1; + + copy_dst1 [0] = copy_src1 [0]; + copy_dst1 [1] = copy_src1 [1]; + copy_dst1 [2] = copy_src1 [2]; + copy_dst1 [3] = copy_src1 [3]; + copy_dst1 [4] = copy_src1 [4]; + vlib_buffer_advance (b1, - (word) vec_len(t1->rewrite)); + ip1 = vlib_buffer_get_current (b1); + sr1 = (ip6_sr_header_t *) (ip1+1); + memcpy (sr1, t1->rewrite, vec_len (t1->rewrite)); + sr1->protocol = ip1->protocol; + ip1->protocol = 43; + new_l1 = clib_net_to_host_u16(ip1->payload_length) + + vec_len (t1->rewrite); + ip1->payload_length = clib_host_to_net_u16(new_l1); + ip1->dst_address.as_u64[0] = t1->first_hop.as_u64[0]; + ip1->dst_address.as_u64[1] = t1->first_hop.as_u64[1]; + + sr_fix_hmac (sm, ip1, sr1); + + next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) : + next1; + + /* + * Ignore "do not rewrite" shtik in this path + */ + if (PREDICT_FALSE (next1 & 0x80000000)) + { + next1 ^= 0xFFFFFFFF; + if (PREDICT_FALSE(next1 == SR_REWRITE_NEXT_ERROR)) + b1->error = + node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; + } + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_index = t0 - sm->tunnels; + memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + tr->length = new_l0; + tr->next_index = next0; + memcpy (tr->sr, sr0, sizeof (tr->sr)); + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->tunnel_index = t1 - sm->tunnels; + memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + tr->length = new_l1; + tr->next_index = next1; + memcpy (tr->sr, sr1, sizeof (tr->sr)); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + ip6_header_t * ip0; + ip_adjacency_t * adj0; + ip6_sr_header_t * sr0; + ip6_sr_tunnel_t * t0; + u64 * copy_src0, * copy_dst0; + u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP; + u16 new_l0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* + * $$$ parse through header(s) to pick the point + * where we punch in the SR extention header + */ + + adj0 = ip_get_adjacency (lm, vnet_buffer(b0)->ip.adj_index[VLIB_TX]); + t0 = pool_elt_at_index (sm->tunnels, + adj0->rewrite_header.sw_if_index); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE + >= ((word) vec_len (t0->rewrite)) + b0->current_data); + + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; + + ip0 = vlib_buffer_get_current (b0); + + /* + * SR-unaware service chaining case: pkt coming back from + * service has the original dst address, and will already + * have an SR header. If so, send it to sr-local + */ + if (PREDICT_FALSE(ip0->protocol == 43)) + { + vlib_buffer_advance (b0, sizeof(ip0)); + sr0 = (ip6_sr_header_t *) (ip0+1); + new_l0 = clib_net_to_host_u16(ip0->payload_length); + next0 = SR_REWRITE_NEXT_SR_LOCAL; + } + else + { + copy_dst0 = (u64 *)(((u8 *)ip0) - vec_len (t0->rewrite)); + copy_src0 = (u64 *) ip0; + + /* + * Copy data before the punch-in point left by the + * required amount. Assume (for the moment) that only + * the main packet header needs to be copied. + */ + copy_dst0 [0] = copy_src0 [0]; + copy_dst0 [1] = copy_src0 [1]; + copy_dst0 [2] = copy_src0 [2]; + copy_dst0 [3] = copy_src0 [3]; + copy_dst0 [4] = copy_src0 [4]; + vlib_buffer_advance (b0, - (word) vec_len(t0->rewrite)); + ip0 = vlib_buffer_get_current (b0); + sr0 = (ip6_sr_header_t *) (ip0+1); + /* $$$ tune */ + memcpy (sr0, t0->rewrite, vec_len (t0->rewrite)); + /* Fix the next header chain */ + sr0->protocol = ip0->protocol; + ip0->protocol = 43; /* routing extension header */ + new_l0 = clib_net_to_host_u16(ip0->payload_length) + + vec_len (t0->rewrite); + ip0->payload_length = clib_host_to_net_u16(new_l0); + /* Rewrite the ip6 dst address */ + ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0]; + ip0->dst_address.as_u64[1] = t0->first_hop.as_u64[1]; + + sr_fix_hmac (sm, ip0, sr0); + + next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : + next0; + + /* + * Ignore "do not rewrite" shtik in this path + */ + if (PREDICT_FALSE (next0 & 0x80000000)) + { + next0 ^= 0xFFFFFFFF; + if (PREDICT_FALSE(next0 == SR_REWRITE_NEXT_ERROR)) + b0->error = + node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; + } + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_index = t0 - sm->tunnels; + memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + tr->length = new_l0; + tr->next_index = next0; + memcpy (tr->sr, sr0, sizeof (tr->sr)); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (sr_rewrite_node) = { + .function = sr_rewrite, + .name = "sr-rewrite", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .format_trace = format_sr_rewrite_trace, + .format_buffer = format_ip6_sr_header_with_length, + + .n_errors = SR_REWRITE_N_ERROR, + .error_strings = sr_rewrite_error_strings, + + .runtime_data_bytes = 0, + + .n_next_nodes = SR_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_REWRITE_NEXT_##s] = n, + foreach_sr_rewrite_next +#undef _ + }, +}; + +static int ip6_delete_route_no_next_hop (ip6_address_t *dst_address_arg, + u32 dst_address_length, + u32 rx_table_id) +{ + ip6_add_del_route_args_t a; + ip6_address_t dst_address; + ip6_fib_t * fib; + ip6_main_t * im6 = &ip6_main; + BVT(clib_bihash_kv) kv, value; + + fib = find_ip6_fib_by_table_index_or_id (im6, rx_table_id, + IP6_ROUTE_FLAG_TABLE_ID); + memset (&a, 0, sizeof (a)); + a.flags |= IP4_ROUTE_FLAG_DEL; + a.dst_address_length = dst_address_length; + + dst_address = *dst_address_arg; + + ip6_address_mask (&dst_address, + &im6->fib_masks[dst_address_length]); + + kv.key[0] = dst_address.as_u64[0]; + kv.key[1] = dst_address.as_u64[1]; + kv.key[2] = ((u64)((fib - im6->fibs))<<32) | dst_address_length; + + if (BV(clib_bihash_search)(&im6->ip6_lookup_table, &kv, &value) < 0) + { + clib_warning ("%U/%d not in FIB", + format_ip6_address, &a.dst_address, + a.dst_address_length); + return -10; + } + + a.adj_index = value.value; + a.dst_address = dst_address; + + ip6_add_del_route (im6, &a); + ip6_maybe_remap_adjacencies (im6, rx_table_id, IP6_ROUTE_FLAG_TABLE_ID); + return 0; +} + +static ip6_sr_hmac_key_t * +find_or_add_shared_secret (ip6_sr_main_t * sm, u8 * secret, u32 * indexp) +{ + uword * p; + ip6_sr_hmac_key_t * key = 0; + int i; + + p = hash_get_mem (sm->hmac_key_by_shared_secret, secret); + + if (p) + { + key = vec_elt_at_index (sm->hmac_keys, p[0]); + if (indexp) + *indexp = p[0]; + return (key); + } + + /* Specific key ID? */ + if (indexp && *indexp) + { + vec_validate (sm->hmac_keys, *indexp); + key = sm->hmac_keys + *indexp; + } + else + { + for (i = 0; i < vec_len (sm->hmac_keys); i++) + { + if (sm->hmac_keys[i].shared_secret == 0) + key = sm->hmac_keys + i; + goto found; + } + vec_validate (sm->hmac_keys, i); + key = sm->hmac_keys + i; + found: + ; + } + + key->shared_secret = vec_dup (secret); + + hash_set_mem (sm->hmac_key_by_shared_secret, key->shared_secret, + key - sm->hmac_keys); + + if (indexp) + *indexp = key - sm->hmac_keys; + return (key); +} + +int ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) +{ + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + ip6_sr_tunnel_key_t key; + ip6_sr_tunnel_t * t; + uword * p; + ip6_sr_header_t * h = 0; + u32 header_length; + ip6_address_t * addrp, *this_address; + ip_adjacency_t adj, * ap, * add_adj = 0; + u32 adj_index; + ip6_sr_main_t * sm = &sr_main; + u8 * key_copy; + u32 rx_fib_index, tx_fib_index; + ip6_add_del_route_args_t aa; + u32 hmac_key_index_u32; + u8 hmac_key_index = 0; + + /* Make sure that the rx FIB exists */ + p = hash_get (im->fib_index_by_table_id, a->rx_table_id); + + if (p == 0) + return -3; + + /* remember the FIB index */ + rx_fib_index = p[0]; + + /* Make sure that the supplied FIB exists */ + p = hash_get (im->fib_index_by_table_id, a->tx_table_id); + + if (p == 0) + return -4; + + /* remember the FIB index */ + tx_fib_index = p[0]; + + memcpy (key.src.as_u8, a->src_address->as_u8, sizeof (key.src)); + memcpy (key.dst.as_u8, a->dst_address->as_u8, sizeof (key.dst)); + + p = hash_get_mem (sm->tunnel_index_by_key, &key); + + if (p) + { + if (a->is_del) + { + hash_pair_t *hp; + + /* Delete existing tunnel */ + t = pool_elt_at_index (sm->tunnels, p[0]); + + ip6_delete_route_no_next_hop (&t->key.dst, t->dst_mask_width, + a->rx_table_id); + vec_free (t->rewrite); + pool_put (sm->tunnels, t); + hp = hash_get_pair (sm->tunnel_index_by_key, &key); + key_copy = (void *)(hp->key); + hash_unset_mem (sm->tunnel_index_by_key, &key); + vec_free (key_copy); + return 0; + } + else /* create; tunnel already exists; complain */ + return -1; + } + else + { + /* delete; tunnel does not exist; complain */ + if (a->is_del) + return -2; + } + + /* create a new tunnel */ + pool_get (sm->tunnels, t); + memset (t, 0, sizeof (*t)); + + memcpy (&t->key, &key, sizeof (t->key)); + t->dst_mask_width = a->dst_mask_width; + t->rx_fib_index = rx_fib_index; + t->tx_fib_index = tx_fib_index; + + /* The first specified hop goes right into the dst address */ + if (vec_len(a->segments)) + { + t->first_hop = a->segments[0]; + /* It won't feel nice if we do it twice */ + vec_delete (a->segments, 1, 0); + } + else /* there must be at least one segment... */ + return -4; + + /* + * Create the sr header rewrite string + * We append the dst address to the set of next hops + * so the ultimate recipient can tell where the + * packet entered the SR domain + */ + header_length = sizeof (*h) + + sizeof (ip6_address_t) * (vec_len (a->segments) + vec_len (a->tags)); + + if (a->shared_secret) + { + /* Allocate a new key slot if we don't find the secret key */ + hmac_key_index_u32 = 0; + (void) find_or_add_shared_secret (sm, a->shared_secret, + &hmac_key_index_u32); + + /* Hey Vinz Clortho: Gozzer is pissed.. you're out of keys! */ + if (hmac_key_index_u32 >= 256) + return -5; + hmac_key_index = hmac_key_index_u32; + header_length += SHA256_DIGEST_LENGTH; + } + + vec_validate (t->rewrite, header_length-1); + + h = (ip6_sr_header_t *) t->rewrite; + + h->protocol = 0xFF; /* we don't know yet */ + + h->length = (header_length/8) - 1; + h->type = ROUTING_HEADER_TYPE_SR; + h->segments_left = vec_len (a->segments); + h->first_segment = vec_len(a->segments) -1; + if (a->shared_secret) + h->hmac_key = hmac_key_index & 0xFF; + + h->flags = a->flags_net_byte_order; + + /* Paint on the segment list, in reverse */ + addrp = h->segments + (vec_len (a->segments) - 1); + + vec_foreach (this_address, a->segments) + { + memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + + /* Paint on the tag list, not reversed */ + addrp = h->segments + vec_len(a->segments); + + vec_foreach (this_address, a->tags) + { + memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp++; + } + + key_copy = vec_new (ip6_sr_tunnel_key_t, 1); + memcpy (key_copy, &key, sizeof (ip6_sr_tunnel_key_t)); + hash_set_mem (sm->tunnel_index_by_key, key_copy, t - sm->tunnels); + + memset(&adj, 0, sizeof (adj)); + + /* Create an adjacency and add to v6 fib */ + adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + adj.lookup_next_index = sm->ip6_lookup_sr_next_index; + adj.explicit_fib_index = ~0; + + ap = ip_add_adjacency (lm, &adj, 1 /* one adj */, + &adj_index); + + /* + * Stick the tunnel index into the rewrite header. + * + * Unfortunately, inserting an SR header according to the various + * RFC's requires parsing through the ip6 header, perhaps consing a + * buffer onto the head of the vlib_buffer_t, etc. We don't use the + * normal reverse bcopy rewrite code. + * + * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain + * at some point... + */ + ap->rewrite_header.sw_if_index = t - sm->tunnels; + + vec_add1 (add_adj, ap[0]); + + memcpy (aa.dst_address.as_u8, a->dst_address, sizeof (aa.dst_address.as_u8)); + aa.dst_address_length = a->dst_mask_width; + + aa.flags = (a->is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD); + aa.flags |= IP6_ROUTE_FLAG_FIB_INDEX; + aa.table_index_or_table_id = rx_fib_index; + aa.add_adj = add_adj; + aa.adj_index = adj_index; + aa.n_add_adj = 1; + ip6_add_del_route (im, &aa); + vec_free (add_adj); + + return 0; +} + +static clib_error_t * +sr_add_del_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0; + ip6_address_t src_address; + int src_address_set = 0; + ip6_address_t dst_address; + u32 dst_mask_width; + int dst_address_set = 0; + u16 flags = 0; + u8 *shared_secret = 0; + u32 rx_table_id = 0; + u32 tx_table_id = 0; + ip6_address_t * segments = 0; + ip6_address_t * this_seg; + ip6_address_t * tags = 0; + ip6_address_t * this_tag; + ip6_sr_add_del_tunnel_args_t _a, *a=&_a; + ip6_address_t next_address, tag; + int pl_index; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (unformat (input, "rx-fib-id %d", &rx_table_id)) + ; + else if (unformat (input, "tx-fib-id %d", &tx_table_id)) + ; + else if (unformat (input, "src %U", unformat_ip6_address, &src_address)) + src_address_set = 1; + else if (unformat (input, "dst %U/%d", + unformat_ip6_address, &dst_address, + &dst_mask_width)) + dst_address_set = 1; + else if (unformat (input, "next %U", unformat_ip6_address, + &next_address)) + { + vec_add2 (segments, this_seg, 1); + memcpy (this_seg->as_u8, next_address.as_u8, sizeof (*this_seg)); + } + else if (unformat (input, "tag %U", unformat_ip6_address, + &tag)) + { + vec_add2 (tags, this_tag, 1); + memcpy (this_tag->as_u8, tag.as_u8, sizeof (*this_tag)); + } + else if (unformat (input, "clean")) + flags |= IP6_SR_HEADER_FLAG_CLEANUP; + else if (unformat (input, "protected")) + flags |= IP6_SR_HEADER_FLAG_PROTECTED; + else if (unformat (input, "key %s", &shared_secret)) + /* Do not include the trailing NULL byte. Guaranteed interop issue */ + _vec_len (shared_secret) -= 1; + else if (unformat (input, "InPE %d", &pl_index)) + { + if (pl_index <= 0 || pl_index > 4) + { + pl_index_range_error: + return clib_error_return + (0, "Policy List Element Index %d out of range (1-4)", pl_index); + + } + flags |= IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE + << ip6_sr_policy_list_shift_from_index (pl_index); + } + else if (unformat (input, "EgPE %d", &pl_index)) + { + if (pl_index <= 0 || pl_index > 4) + goto pl_index_range_error; + flags |= IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE + << ip6_sr_policy_list_shift_from_index (pl_index); + } + else if (unformat (input, "OrgSrc %d", &pl_index)) + { + if (pl_index <= 0 || pl_index > 4) + goto pl_index_range_error; + flags |= IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR + << ip6_sr_policy_list_shift_from_index (pl_index); + } + else + break; + } + + if (!src_address_set) + return clib_error_return (0, "src address required"); + + if (!dst_address_set) + return clib_error_return (0, "dst address required"); + + if (!segments) + return clib_error_return (0, "at least one sr segment required"); + + memset (a, 0, sizeof (*a)); + a->src_address = &src_address; + a->dst_address = &dst_address; + a->dst_mask_width = dst_mask_width; + a->segments = segments; + a->tags = tags; + a->flags_net_byte_order = clib_host_to_net_u16(flags); + a->is_del = is_del; + a->rx_table_id = rx_table_id; + a->tx_table_id = tx_table_id; + a->shared_secret = shared_secret; + + rv = ip6_sr_add_del_tunnel (a); + + vec_free (segments); + vec_free (tags); + vec_free (shared_secret); + + switch (rv) + { + case 0: + break; + + case -1: + return clib_error_return (0, "SR tunnel src %U dst %U already exists", + format_ip6_address, &src_address, + format_ip6_address, &dst_address); + + case -2: + return clib_error_return (0, "SR tunnel src %U dst %U does not exist", + format_ip6_address, &src_address, + format_ip6_address, &dst_address); + + case -3: + return clib_error_return (0, "FIB table %d does not exist", rx_table_id); + + case -4: + return clib_error_return (0, "At least one segment is required"); + + default: + return clib_error_return (0, "BUG: ip6_sr_add_del_tunnel returns %d", + rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (sr_tunnel_command, static) = { + .path = "sr tunnel", + .short_help = + "sr tunnel [del] <src> <dst> [next <addr>] [cleanup] [reroute] [key %s]", + .function = sr_add_del_tunnel_command_fn, +}; + + +static clib_error_t * +show_sr_tunnel_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + static ip6_sr_tunnel_t ** tunnels; + ip6_sr_tunnel_t * t; + ip6_sr_main_t * sm = &sr_main; + ip6_main_t * im = &ip6_main; + ip6_fib_t * rx_fib, * tx_fib; + int i; + + vec_reset_length (tunnels); + + pool_foreach (t, sm->tunnels, + ({ + vec_add1 (tunnels, t); + })); + + if (vec_len (tunnels) == 0) + vlib_cli_output (vm, "No SR tunnels configured"); + + for (i = 0; i < vec_len (tunnels); i++) + { + t = tunnels [i]; + + rx_fib = find_ip6_fib_by_table_index_or_id (im, t->rx_fib_index, + IP6_ROUTE_FLAG_FIB_INDEX); + + tx_fib = find_ip6_fib_by_table_index_or_id (im, t->tx_fib_index, + IP6_ROUTE_FLAG_FIB_INDEX); + + vlib_cli_output (vm, "src %U dst %U first hop %U", + format_ip6_address, &t->key.src, + format_ip6_address, &t->key.dst, + format_ip6_address, &t->first_hop); + vlib_cli_output (vm, " rx-fib-id %d tx-fib-id %d", + rx_fib->table_id, tx_fib->table_id); + vlib_cli_output (vm, " sr: %U", format_ip6_sr_header, t->rewrite, + 0 /* print_hmac */); + vlib_cli_output (vm, "-------"); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_sr_tunnel_command, static) = { + .path = "show sr tunnel", + .short_help = "show sr tunnel", + .function = show_sr_tunnel_fn, +}; + +#define foreach_sr_fix_dst_addr_next \ +_(DROP, "error-drop") + +typedef enum { +#define _(s,n) SR_FIX_DST_ADDR_NEXT_##s, +foreach_sr_fix_dst_addr_next +#undef _ + SR_FIX_DST_ADDR_N_NEXT, +} sr_fix_dst_addr_next_t; + +static char * sr_fix_dst_error_strings[] = { +#define sr_fix_dst_error(n,s) s, +#include "sr_fix_dst_error.def" +#undef sr_fix_dst_error +}; + +typedef enum { +#define sr_fix_dst_error(n,s) SR_FIX_DST_ERROR_##n, +#include "sr_fix_dst_error.def" +#undef sr_fix_dst_error + SR_FIX_DST_N_ERROR, +} sr_fix_dst_error_t; + +typedef struct { + ip6_address_t src, dst; + u32 next_index; + u32 adj_index; + u8 sr[256]; +} sr_fix_addr_trace_t; + +u8 * format_sr_fix_addr_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sr_fix_addr_trace_t * t = va_arg (*args, sr_fix_addr_trace_t *); + vnet_hw_interface_t * hi = 0; + ip_adjacency_t * adj; + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + vnet_main_t * vnm = vnet_get_main(); + + if (t->adj_index != ~0) + { + adj = ip_get_adjacency (lm, t->adj_index); + hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index); + } + + s = format (s, "SR-FIX_ADDR: next %s ip6 src %U dst %U\n", + (t->next_index == SR_FIX_DST_ADDR_NEXT_DROP) + ? "drop" : "output", + format_ip6_address, &t->src, + format_ip6_address, &t->dst); + if (t->next_index != SR_FIX_DST_ADDR_NEXT_DROP) + { + s = format (s, "%U\n", format_ip6_sr_header, t->sr, 1 /* print_hmac */); + s = format (s, " output via %s", hi ? (char *)(hi->name) + : "Invalid adj"); + } + return s; +} + +static uword +sr_fix_dst_addr (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + +#if 0 + while (0 && n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + __attribute__((unused)) vlib_buffer_t * b0, * b1; + u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP; + u32 next1 = SR_FIX_DST_ADDR_NEXT_DROP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + ip6_header_t * ip0; + ip_adjacency_t * adj0; + ip6_sr_header_t * sr0; + u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP; + ip6_address_t *new_dst0; + ethernet_header_t * eh0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + adj0 = ip_get_adjacency (lm, vnet_buffer(b0)->ip.adj_index[VLIB_TX]); + next0 = adj0->mcast_group_index; + + /* We should be pointing at an Ethernet header... */ + eh0 = vlib_buffer_get_current (b0); + ip0 = (ip6_header_t *)(eh0+1); + sr0 = (ip6_sr_header_t *) (ip0+1); + + /* We'd better find an SR header... */ + if (PREDICT_FALSE(ip0->protocol != 43)) + { + b0->error = node->errors[SR_FIX_DST_ERROR_NO_SR_HEADER]; + goto do_trace0; + } + else + { + /* + * We get here from sr_rewrite or sr_local, with + * sr->segments_left pointing at the (copy of the original) dst + * address. Use it, then increment sr0->segments_left. + */ + + /* Out of segments? Turf the packet */ + if (PREDICT_FALSE (sr0->segments_left == 0)) + { + b0->error = node->errors[SR_FIX_DST_ERROR_NO_MORE_SEGMENTS]; + goto do_trace0; + } + + /* + * Rewrite the packet with the original dst address + * We assume that the last segment (in processing order) contains + * the original dst address. The list is reversed, so sr0->segments + * contains the original dst address. + */ + new_dst0 = sr0->segments; + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + } + + do_trace0: + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_fix_addr_trace_t *t = vlib_add_trace (vm, node, + b0, sizeof (*t)); + t->next_index = next0; + t->adj_index = ~0; + + if (next0 != SR_FIX_DST_ADDR_NEXT_DROP) + { + t->adj_index = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + memcpy (t->src.as_u8, ip0->src_address.as_u8, + sizeof (t->src.as_u8)); + memcpy (t->dst.as_u8, ip0->dst_address.as_u8, + sizeof (t->dst.as_u8)); + memcpy (t->sr, sr0, sizeof (t->sr)); + } + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + + +VLIB_REGISTER_NODE (sr_fix_dst_addr_node) = { + .function = sr_fix_dst_addr, + .name = "sr-fix-dst-addr", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .format_trace = format_sr_fix_addr_trace, + .format_buffer = format_ip6_sr_header_with_length, + + .runtime_data_bytes = 0, + + .n_errors = SR_FIX_DST_N_ERROR, + .error_strings = sr_fix_dst_error_strings, + + .n_next_nodes = SR_FIX_DST_ADDR_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_FIX_DST_ADDR_NEXT_##s] = n, + foreach_sr_fix_dst_addr_next +#undef _ + }, +}; + +static clib_error_t * sr_init (vlib_main_t * vm) +{ + ip6_sr_main_t * sm = &sr_main; + clib_error_t * error = 0; + vlib_node_t * ip6_lookup_node, * ip6_rewrite_node; + vlib_node_t * ip6_rewrite_local_node; + u32 verify_next_index; + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + sm->vlib_main = vm; + sm->vnet_main = vnet_get_main(); + + vec_validate (sm->hmac_keys, 0); + sm->hmac_keys[0].shared_secret = (u8 *) 0xdeadbeef; + + sm->tunnel_index_by_key = + hash_create_mem (0, sizeof (ip6_sr_tunnel_key_t), sizeof (uword)); + + sm->hmac_key_by_shared_secret = hash_create_string (0, sizeof(uword)); + + ip6_register_protocol (43, sr_local_node.index); + + ip6_lookup_node = vlib_get_node_by_name (vm, (u8 *)"ip6-lookup"); + ASSERT(ip6_lookup_node); + + ip6_rewrite_node = vlib_get_node_by_name (vm, (u8 *)"ip6-rewrite"); + ASSERT(ip6_rewrite_node); + + ip6_rewrite_local_node = vlib_get_node_by_name (vm, + (u8 *)"ip6-rewrite-local"); + ASSERT(ip6_rewrite_local_node); + + /* Add a disposition to ip6_lookup for the sr rewrite node */ + sm->ip6_lookup_sr_next_index = + vlib_node_add_next (vm, ip6_lookup_node->index, sr_rewrite_node.index); + + /* Add a disposition to ip6_rewrite for the sr dst address hack node */ + sm->ip6_rewrite_sr_next_index = + vlib_node_add_next (vm, ip6_rewrite_node->index, + sr_fix_dst_addr_node.index); + /* + * Fix ip6-rewrite-local, sibling of the above. The sibling bitmap + * isn't set up at this point, so we have to do it manually + */ + verify_next_index = vlib_node_add_next + (vm, ip6_rewrite_local_node->index, + sr_fix_dst_addr_node.index); + + ASSERT(sm->ip6_rewrite_sr_next_index == verify_next_index); + + OpenSSL_add_all_digests(); + + sm->md = (void *) EVP_get_digestbyname ("sha1"); + sm->hmac_ctx = clib_mem_alloc (sizeof (HMAC_CTX)); + + return error; +} + +VLIB_INIT_FUNCTION (sr_init); + +#define foreach_sr_local_next \ + _ (ERROR, "error-drop") \ + _ (IP6_LOOKUP, "ip6-lookup") + +typedef enum { +#define _(s,n) SR_LOCAL_NEXT_##s, + foreach_sr_local_next +#undef _ + SR_LOCAL_N_NEXT, +} sr_local_next_t; + +typedef struct { + u8 next_index; + u8 sr_valid; + ip6_address_t src, dst; + u16 length; + u8 sr[256]; +} sr_local_trace_t; + +static char * sr_local_error_strings[] = { +#define sr_error(n,s) s, +#include "sr_error.def" +#undef sr_error +}; + +typedef enum { +#define sr_error(n,s) SR_LOCAL_ERROR_##n, +#include "sr_error.def" +#undef sr_error + SR_LOCAL_N_ERROR, +} sr_local_error_t; + +u8 * format_sr_local_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sr_local_trace_t * t = va_arg (*args, sr_local_trace_t *); + + s = format (s, "SR-LOCAL: src %U dst %U len %u next_index %d", + format_ip6_address, &t->src, + format_ip6_address, &t->dst, t->length, t->next_index); + if (t->sr_valid) + s = format (s, "\n %U", format_ip6_sr_header, t->sr, 1 /* print_hmac */); + else + s = format (s, "\n popped SR header"); + + return s; +} + + +/* $$$$ fixme: smp, don't copy data, cache input, output (maybe) */ + +static int sr_validate_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, + ip6_sr_header_t * sr) +{ + u32 key_index; + static u8 * keybuf; + u8 * copy_target; + int first_segment; + ip6_address_t *addrp; + int i; + ip6_sr_hmac_key_t * hmac_key; + static u8 * signature; + u32 sig_len; + + key_index = sr->hmac_key; + + /* No signature? Pass... */ + if (key_index == 0) + return 0; + + /* We don't know about this key? Fail... */ + if (key_index >= vec_len (sm->hmac_keys)) + return 1; + + vec_validate (signature, SHA256_DIGEST_LENGTH-1); + + hmac_key = sm->hmac_keys + key_index; + + vec_reset_length (keybuf); + + /* pkt ip6 src address */ + vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); + memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t)); + + /* last segment */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] = sr->first_segment; + + /* octet w/ bit 0 = "clean" flag */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] + = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)) + ? 0x80 : 0; + + /* hmac key id */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] = sr->hmac_key; + + first_segment = sr->first_segment; + + addrp = sr->segments; + + /* segments */ + for (i = 0; i <= first_segment; i++) + { + vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); + memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t)); + addrp++; + } + + if (sm->is_debug) + clib_warning ("verify key index %d keybuf: %U", key_index, + format_hex_bytes, keybuf, vec_len(keybuf)); + + /* shared secret */ + + /* SHA1 is shorter than SHA-256 */ + memset (signature, 0, vec_len(signature)); + + HMAC_CTX_init(sm->hmac_ctx); + if (!HMAC_Init(sm->hmac_ctx, hmac_key->shared_secret, + vec_len(hmac_key->shared_secret),sm->md)) + clib_warning ("barf1"); + if (!HMAC_Update(sm->hmac_ctx,keybuf,vec_len(keybuf))) + clib_warning ("barf2"); + if (!HMAC_Final(sm->hmac_ctx,signature,&sig_len)) + clib_warning ("barf3"); + HMAC_CTX_cleanup(sm->hmac_ctx); + + if (sm->is_debug) + clib_warning ("computed signature len %d, value %U", sig_len, + format_hex_bytes, signature, vec_len(signature)); + + /* Point at the SHA signature in the packet */ + addrp++; + if (sm->is_debug) + clib_warning ("read signature %U", format_hex_bytes, addrp, + SHA256_DIGEST_LENGTH); + + return memcmp (signature, addrp, SHA256_DIGEST_LENGTH); +} + +static uword +sr_local (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + ip6_sr_main_t * sm = &sr_main; + u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *, + vlib_buffer_t *, ip6_header_t *, + ip6_sr_header_t *); + sr_local_cb = sm->sr_local_cb; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + ip6_header_t * ip0, *ip1; + ip6_sr_header_t * sr0, *sr1; + ip6_address_t * new_dst0, * new_dst1; + u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP; + u32 next1 = SR_LOCAL_NEXT_IP6_LOOKUP; + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + sr0 = (ip6_sr_header_t *)(ip0+1); + + if (PREDICT_FALSE(sr0->type != ROUTING_HEADER_TYPE_SR)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; + goto do_trace0; + } + + /* Out of segments? Turf the packet */ + if (PREDICT_FALSE (sr0->segments_left == 0)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; + goto do_trace0; + } + + if (PREDICT_FALSE(sm->validate_hmac)) + { + if (sr_validate_hmac (sm, ip0, sr0)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; + goto do_trace0; + } + } + + next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : + next0; + + /* + * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx + */ + if (PREDICT_FALSE (next0 & 0x80000000)) + { + next0 ^= 0xFFFFFFFF; + if (PREDICT_FALSE(next0 == SR_LOCAL_NEXT_ERROR)) + b0->error = + node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; + } + else + { + u32 segment_index0; + + segment_index0 = sr0->segments_left - 1; + + /* Rewrite the packet */ + new_dst0 = (ip6_address_t *)(sr0->segments + segment_index0); + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + + if (PREDICT_TRUE (sr0->segments_left > 0)) + { + sr0->segments_left -= 1; + goto do_trace0; + } + } + + /* End of the path. Clean up the SR header, or not */ + if (sr0->flags & clib_host_to_net_u16(IP6_SR_HEADER_FLAG_CLEANUP)) + { + u64 *copy_dst0, *copy_src0; + u16 new_l0; + /* + * Copy the ip6 header right by the (real) length of the + * sr header. Here's another place which assumes that + * the sr header is the only extention header. + */ + + ip0->protocol = sr0->protocol; + vlib_buffer_advance (b0, (sr0->length+1)*8); + + new_l0 = clib_net_to_host_u16(ip0->payload_length) - + (sr0->length+1)*8; + ip0->payload_length = clib_host_to_net_u16(new_l0); + + copy_src0 = (u64 *)ip0; + copy_dst0 = copy_src0 + (sr0->length + 1); + + copy_dst0 [4] = copy_src0[4]; + copy_dst0 [3] = copy_src0[3]; + copy_dst0 [2] = copy_src0[2]; + copy_dst0 [1] = copy_src0[1]; + copy_dst0 [0] = copy_src0[0]; + + sr0 = 0; + } + + do_trace0: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_local_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + tr->length = vlib_buffer_length_in_chain (vm, b0); + tr->next_index = next0; + tr->sr_valid = sr0 != 0; + if (tr->sr_valid) + memcpy (tr->sr, sr0, sizeof (tr->sr)); + } + + b1 = vlib_get_buffer (vm, bi1); + ip1 = vlib_buffer_get_current (b1); + sr1 = (ip6_sr_header_t *)(ip1+1); + + if (PREDICT_FALSE(sr1->type != ROUTING_HEADER_TYPE_SR)) + { + next1 = SR_LOCAL_NEXT_ERROR; + b1->error = node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; + goto do_trace1; + } + + /* Out of segments? Turf the packet */ + if (PREDICT_FALSE (sr1->segments_left == 0)) + { + next1 = SR_LOCAL_NEXT_ERROR; + b1->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; + goto do_trace1; + } + + if (PREDICT_FALSE(sm->validate_hmac)) + { + if (sr_validate_hmac (sm, ip1, sr1)) + { + next1 = SR_LOCAL_NEXT_ERROR; + b1->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; + goto do_trace1; + } + } + + next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) : + next1; + + /* + * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx + */ + if (PREDICT_FALSE (next1 & 0x80000000)) + { + next1 ^= 0xFFFFFFFF; + if (PREDICT_FALSE(next1 == SR_LOCAL_NEXT_ERROR)) + b1->error = + node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; + } + else + { + u32 segment_index1; + + segment_index1 = sr1->segments_left - 1; + + /* Rewrite the packet */ + new_dst1 = (ip6_address_t *)(sr1->segments + segment_index1); + ip1->dst_address.as_u64[0] = new_dst1->as_u64[0]; + ip1->dst_address.as_u64[1] = new_dst1->as_u64[1]; + + if (PREDICT_TRUE (sr1->segments_left > 0)) + { + sr1->segments_left -= 1; + goto do_trace1; + } + } + + /* End of the path. Clean up the SR header, or not */ + if (sr1->flags & clib_host_to_net_u16(IP6_SR_HEADER_FLAG_CLEANUP)) + { + u64 *copy_dst1, *copy_src1; + u16 new_l1; + /* + * Copy the ip6 header right by the (real) length of the + * sr header. Here's another place which assumes that + * the sr header is the only extention header. + */ + + ip1->protocol = sr1->protocol; + vlib_buffer_advance (b1, (sr1->length+1)*8); + + new_l1 = clib_net_to_host_u16(ip1->payload_length) - + (sr1->length+1)*8; + ip1->payload_length = clib_host_to_net_u16(new_l1); + + copy_src1 = (u64 *)ip1; + copy_dst1 = copy_src1 + (sr1->length + 1); + + copy_dst1 [4] = copy_src1[4]; + copy_dst1 [3] = copy_src1[3]; + copy_dst1 [2] = copy_src1[2]; + copy_dst1 [1] = copy_src1[1]; + copy_dst1 [0] = copy_src1[0]; + + sr1 = 0; + } + + do_trace1: + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_local_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + tr->length = vlib_buffer_length_in_chain (vm, b1); + tr->next_index = next1; + tr->sr_valid = sr1 != 0; + if (tr->sr_valid) + memcpy (tr->sr, sr1, sizeof (tr->sr)); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + ip6_header_t * ip0; + ip6_sr_header_t * sr0; + ip6_address_t * new_dst0; + u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + sr0 = (ip6_sr_header_t *)(ip0+1); + + if (PREDICT_FALSE(sr0->type != ROUTING_HEADER_TYPE_SR)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; + goto do_trace; + } + + /* Out of segments? Turf the packet */ + if (PREDICT_FALSE (sr0->segments_left == 0)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; + goto do_trace; + } + + if (PREDICT_FALSE(sm->validate_hmac)) + { + if (sr_validate_hmac (sm, ip0, sr0)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; + goto do_trace; + } + } + + next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : + next0; + + /* + * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx + */ + if (PREDICT_FALSE (next0 & 0x80000000)) + { + next0 ^= 0xFFFFFFFF; + if (PREDICT_FALSE(next0 == SR_LOCAL_NEXT_ERROR)) + b0->error = + node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; + } + else + { + u32 segment_index0; + + segment_index0 = sr0->segments_left - 1; + + /* Rewrite the packet */ + new_dst0 = (ip6_address_t *)(sr0->segments + segment_index0); + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + + if (PREDICT_TRUE (sr0->segments_left > 0)) + { + sr0->segments_left -= 1; + goto do_trace; + } + } + + /* End of the path. Clean up the SR header, or not */ + if (sr0->flags & clib_host_to_net_u16(IP6_SR_HEADER_FLAG_CLEANUP)) + { + u64 *copy_dst0, *copy_src0; + u16 new_l0; + /* + * Copy the ip6 header right by the (real) length of the + * sr header. Here's another place which assumes that + * the sr header is the only extention header. + */ + + ip0->protocol = sr0->protocol; + vlib_buffer_advance (b0, (sr0->length+1)*8); + + new_l0 = clib_net_to_host_u16(ip0->payload_length) - + (sr0->length+1)*8; + ip0->payload_length = clib_host_to_net_u16(new_l0); + + copy_src0 = (u64 *)ip0; + copy_dst0 = copy_src0 + (sr0->length + 1); + + copy_dst0 [4] = copy_src0[4]; + copy_dst0 [3] = copy_src0[3]; + copy_dst0 [2] = copy_src0[2]; + copy_dst0 [1] = copy_src0[1]; + copy_dst0 [0] = copy_src0[0]; + + sr0 = 0; + } + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_local_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + tr->length = vlib_buffer_length_in_chain (vm, b0); + tr->next_index = next0; + tr->sr_valid = sr0 != 0; + if (tr->sr_valid) + memcpy (tr->sr, sr0, sizeof (tr->sr)); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, sr_local_node.index, + SR_LOCAL_ERROR_PKTS_PROCESSED, + from_frame->n_vectors); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (sr_local_node, static) = { + .function = sr_local, + .name = "sr-local", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .format_trace = format_sr_local_trace, + + .runtime_data_bytes = 0, + + .n_errors = SR_LOCAL_N_ERROR, + .error_strings = sr_local_error_strings, + + .n_next_nodes = SR_LOCAL_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_LOCAL_NEXT_##s] = n, + foreach_sr_local_next +#undef _ + }, +}; + +ip6_sr_main_t * sr_get_main (vlib_main_t * vm) +{ + vlib_call_init_function (vm, sr_init); + ASSERT(sr_local_node.index); + return &sr_main; +} + + +static clib_error_t * +set_ip6_sr_rewrite_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_address_t a; + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 fib_index = 0; + u32 fib_id = 0; + u32 adj_index; + uword * p; + ip_adjacency_t * adj; + vnet_hw_interface_t * hi; + u32 sw_if_index; + ip6_sr_main_t * sm = &sr_main; + vnet_main_t * vnm = vnet_get_main(); + + if (!unformat (input, "%U", unformat_ip6_address, &a)) + return clib_error_return (0, "ip6 address missing in '%U'", + format_unformat_error, input); + + if (unformat (input, "rx-table-id %d", &fib_id)) + { + p = hash_get (im->fib_index_by_table_id, fib_id); + if (p == 0) + return clib_error_return (0, "fib-id %d not found"); + fib_index = p[0]; + } + + adj_index = ip6_fib_lookup_with_table (im, fib_index, &a); + + if (adj_index == lm->miss_adj_index) + return clib_error_return (0, "no match for %U", + format_ip6_address, &a); + + adj = ip_get_adjacency (lm, adj_index); + + if (adj->lookup_next_index != IP_LOOKUP_NEXT_REWRITE) + return clib_error_return (0, "%U unresolved (not a rewrite adj)", + format_ip6_address, &a); + + adj->rewrite_header.next_index = sm->ip6_rewrite_sr_next_index; + + sw_if_index = adj->rewrite_header.sw_if_index; + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + adj->rewrite_header.node_index = sr_fix_dst_addr_node.index; + + /* $$$$$ hack... steal the mcast group index */ + adj->mcast_group_index = + vlib_node_add_next (vm, sr_fix_dst_addr_node.index, hi->output_node_index); + + return 0; +} + +VLIB_CLI_COMMAND (set_ip6_sr_rewrite, static) = { + .path = "set ip6 sr rewrite", + .short_help = "set ip6 sr rewrite <ip6-address> [fib-id <id>]", + .function = set_ip6_sr_rewrite_fn, +}; + +void vnet_register_sr_app_callback (void *cb) +{ + ip6_sr_main_t * sm = &sr_main; + + sm->sr_local_cb = cb; +} + +static clib_error_t * +test_sr_hmac_validate_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t * sm = &sr_main; + + if (unformat (input, "validate on")) + sm->validate_hmac = 1; + else if (unformat (input, "chunk-offset off")) + sm->validate_hmac = 0; + else + return clib_error_return (0, "expected validate on|off in '%U'", + format_unformat_error, input); + + vlib_cli_output (vm, "hmac signature validation %s", + sm->validate_hmac ? + "on" : "off"); + return 0; +} + +VLIB_CLI_COMMAND (test_sr_hmac_validate, static) = { + .path = "test sr hmac", + .short_help = "test sr hmac validate [on|off]", + .function = test_sr_hmac_validate_fn, +}; + +i32 sr_hmac_add_del_key (ip6_sr_main_t * sm, u32 key_id, u8 * shared_secret, + u8 is_del) +{ + u32 index; + ip6_sr_hmac_key_t * key; + + if (is_del == 0) + { + /* Specific key in use? Fail. */ + if (key_id && vec_len (sm->hmac_keys) > key_id + && sm->hmac_keys[key_id].shared_secret) + return -2; + + index = key_id; + key = find_or_add_shared_secret (sm, shared_secret, &index); + ASSERT(index == key_id); + return 0; + } + + /* delete */ + + if (key_id) /* delete by key ID */ + { + if (vec_len (sm->hmac_keys) <= key_id) + return -3; + + key = sm->hmac_keys + key_id; + + hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret); + vec_free (key->shared_secret); + return 0; + } + + index = 0; + key = find_or_add_shared_secret (sm, shared_secret, &index); + hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret); + vec_free (key->shared_secret); + return 0; +} + + +static clib_error_t * +sr_hmac_add_del_key_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t * sm = &sr_main; + u8 is_del = 0; + u32 key_id = 0; + u8 key_id_set = 0; + u8 * shared_secret = 0; + i32 rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (unformat (input, "id %d", &key_id)) + key_id_set = 1; + else if (unformat (input, "key %s", &shared_secret)) + { + /* Do not include the trailing NULL byte. Guaranteed interop issue */ + _vec_len (shared_secret) -= 1; + } + else + break; + } + + if (is_del == 0 && shared_secret == 0) + return clib_error_return (0, "shared secret must be set to add a key"); + + if (shared_secret == 0 && key_id_set == 0) + return clib_error_return (0, "shared secret and key id both unset"); + + rv = sr_hmac_add_del_key (sm, key_id, shared_secret, is_del); + + vec_free (shared_secret); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return (0, "sr_hmac_add_del_key returned %d", + rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (sr_hmac, static) = { + .path = "sr hmac", + .short_help = "sr hmac [del] id <nn> key <str>", + .function = sr_hmac_add_del_key_fn, +}; + + +static clib_error_t * +show_sr_hmac_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t * sm = &sr_main; + int i; + + for (i = 1; i < vec_len (sm->hmac_keys); i++) + { + if (sm->hmac_keys[i].shared_secret) + vlib_cli_output (vm, "[%d]: %v", i, sm->hmac_keys[i].shared_secret); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_sr_hmac, static) = { + .path = "show sr hmac", + .short_help = "show sr hmac", + .function = show_sr_hmac_fn, +}; + +static clib_error_t * +test_sr_debug_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t * sm = &sr_main; + + if (unformat (input, "on")) + sm->is_debug = 1; + else if (unformat (input, "off")) + sm->is_debug = 0; + else + return clib_error_return (0, "expected on|off in '%U'", + format_unformat_error, input); + + vlib_cli_output (vm, "debug trace now %s", sm->is_debug ? "on" : "off"); + + return 0; +} + +VLIB_CLI_COMMAND (test_sr_debug, static) = { + .path = "test sr debug", + .short_help = "test sr debug on|off", + .function = test_sr_debug_fn, +}; diff --git a/vnet/vnet/sr/sr.h b/vnet/vnet/sr/sr.h new file mode 100644 index 00000000000..3c6d981d17e --- /dev/null +++ b/vnet/vnet/sr/sr.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_sr_h +#define included_vnet_sr_h + +#include <vnet/vnet.h> +#include <vnet/sr/sr_packet.h> +#include <vnet/ip/ip6_packet.h> + +#include <openssl/opensslconf.h> +#include <stdlib.h> +#include <string.h> + +#include <openssl/crypto.h> +#include <openssl/sha.h> +#include <openssl/opensslv.h> +#include <openssl/hmac.h> + +typedef struct { + ip6_address_t src; + ip6_address_t dst; +} ip6_sr_tunnel_key_t; + +typedef struct { + /* src, dst address */ + ip6_sr_tunnel_key_t key; + + /* mask width for FIB entry */ + u32 dst_mask_width; + + /* first hop, to save 1 elt in the segment list */ + ip6_address_t first_hop; + + /* Fib indices */ + u32 rx_fib_index; + u32 tx_fib_index; + + /* The actual ip6 sr header */ + u8 * rewrite; +} ip6_sr_tunnel_t; + +typedef struct { + u8 * shared_secret; +} ip6_sr_hmac_key_t; + +typedef struct { + /* Key (header imposition case) */ + ip6_address_t *src_address; + ip6_address_t *dst_address; + u32 dst_mask_width; + u32 rx_table_id; + u32 tx_table_id; + + /* segment list, when inserting an ip6 SR header*/ + ip6_address_t *segments; + + /* + * "Tag" list, aka segments inserted at the end of the list, + * past last_seg + */ + ip6_address_t *tags; + + /* Shared secret => generate SHA-256 HMAC security fields */ + u8 * shared_secret; + + /* Flags, e.g. cleanup, policy-list flags */ + u16 flags_net_byte_order; + + /* Delete the tunnnel? */ + u8 is_del; +} ip6_sr_add_del_tunnel_args_t; + +typedef struct { + /* pool of tunnel instances, sr entry only */ + ip6_sr_tunnel_t *tunnels; + + /* find an sr "tunnel" by its outer-IP src/dst */ + uword * tunnel_index_by_key; + + /* ip6-lookup next index for imposition FIB entries */ + u32 ip6_lookup_sr_next_index; + + /* hmac key id by shared secret */ + uword * hmac_key_by_shared_secret; + + /* ip6-rewrite next index for reinstalling the original dst address */ + u32 ip6_rewrite_sr_next_index; + + /* application API callback */ + void *sr_local_cb; + + /* validate hmac keys */ + u8 validate_hmac; + + /* pool of hmac keys */ + ip6_sr_hmac_key_t * hmac_keys; + + /* Openssl vbls */ + EVP_MD * md; + HMAC_CTX * hmac_ctx; + + /* enable debug spew */ + u8 is_debug; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} ip6_sr_main_t; + +ip6_sr_main_t sr_main; + +format_function_t format_ip6_sr_header; +format_function_t format_ip6_sr_header_with_length; + +vlib_node_registration_t ip6_sr_input_node; + +int ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a); +void vnet_register_sr_app_callback (void *cb); + +#endif /* included_vnet_sr_h */ diff --git a/vnet/vnet/sr/sr_error.def b/vnet/vnet/sr/sr_error.def new file mode 100644 index 00000000000..62d021fd47b --- /dev/null +++ b/vnet/vnet/sr/sr_error.def @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +sr_error (NONE, "no error") +sr_error (BAD_ROUTING_HEADER_TYPE, "bad routing header type (not 4)") +sr_error (NO_MORE_SEGMENTS, "out of SR segment drops") +sr_error (PKTS_PROCESSED, "SR packets processed") +sr_error (APP_CALLBACK, "SR application callback errors") +sr_error (HMAC_INVALID, "SR packets with invalid HMAC signatures") diff --git a/vnet/vnet/sr/sr_fix_dst_error.def b/vnet/vnet/sr/sr_fix_dst_error.def new file mode 100644 index 00000000000..48fe7af6c98 --- /dev/null +++ b/vnet/vnet/sr/sr_fix_dst_error.def @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +sr_fix_dst_error (NONE, "no error") +sr_fix_dst_error (NO_SR_HEADER, "no SR header present") +sr_fix_dst_error (NO_MORE_SEGMENTS, "no more SR segments") diff --git a/vnet/vnet/sr/sr_packet.h b/vnet/vnet/sr/sr_packet.h new file mode 100644 index 00000000000..5604a8dadb7 --- /dev/null +++ b/vnet/vnet/sr/sr_packet.h @@ -0,0 +1,227 @@ +#ifndef included_vnet_sr_packet_h +#define included_vnet_sr_packet_h + +#include <vnet/ip/ip.h> + +/* + * ipv6 segment-routing header format + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * The Segment Routing Header (SRH) is defined as follows: + * + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Next Header | Hdr Ext Len | Routing Type | Segments Left | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | First Segment | Flags | HMAC Key ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Segment List[0] (128 bits ipv6 address) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | | + * ... + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Segment List[n] (128 bits ipv6 address) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Policy List[0] (optional) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Policy List[1] (optional) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Policy List[2] (optional) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | | + * | | + * | HMAC (256 bits) | + * | (optional) | + * | | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * where: + * + * o Next Header: 8-bit selector. Identifies the type of header + * immediately following the SRH. + * + * o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH + * header in 8-octet units, not including the first 8 octets. + * + * o Routing Type: TBD, to be assigned by IANA (suggested value: 4). + * + * o Segments Left. Defined in [RFC2460], it contains the index, in + * the Segment List, of the next segment to inspect. Segments Left + * is decremented at each segment and it is used as an index in the + * segment list. + * + * o First Segment: offset in the SRH, not including the first 8 octets + * and expressed in 16-octet units, pointing to the last element of + * the segment list, which is in fact the first segment of the + * segment routing path. + * + * o Flags: 16 bits of flags. Following flags are defined: + * + * 1 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |C|P|R|R| Policy Flags | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * C-flag: Clean-up flag. Set when the SRH has to be removed from + * the packet when packet reaches the last segment. + * + * P-flag: Protected flag. Set when the packet has been rerouted + * through FRR mechanism by a SR endpoint node. See Section 6.3 + * for more details. + * + * R-flags. Reserved and for future use. + * + * Policy Flags. Define the type of the IPv6 addresses encoded + * into the Policy List (see below). The following have been + * defined: + * + * Bits 4-6: determine the type of the first element after the + * segment list. + * + * Bits 7-9: determine the type of the second element. + * + * Bits 10-12: determine the type of the third element. + * + * Bits 13-15: determine the type of the fourth element. + * + * The following values are used for the type: + * + * 0x0: Not present. If value is set to 0x0, it means the + * element represented by these bits is not present. + * + * 0x1: SR Ingress. + * + * 0x2: SR Egress. + * + * 0x3: Original Source Address. + * + * o HMAC Key ID and HMAC field, and their use are defined in + * [I-D.vyncke-6man-segment-routing-security]. + * + * o Segment List[n]: 128 bit IPv6 addresses representing the nth + * segment in the Segment List. The Segment List is encoded starting + * from the last segment of the path. I.e., the first element of the + * segment list (Segment List [0]) contains the last segment of the + * path while the last segment of the Segment List (Segment List[n]) + * contains the first segment of the path. The index contained in + * "Segments Left" identifies the current active segment. + * + * o Policy List. Optional addresses representing specific nodes in + * the SR path such as: + * + * SR Ingress: a 128 bit generic identifier representing the + * ingress in the SR domain (i.e.: it needs not to be a valid IPv6 + * address). + * + * SR Egress: a 128 bit generic identifier representing the egress + * in the SR domain (i.e.: it needs not to be a valid IPv6 + * address). + * + * Original Source Address: IPv6 address originally present in the + * SA field of the packet. + * + * The segments in the Policy List are encoded after the segment list + * and they are optional. If none are in the SRH, all bits of the + * Policy List Flags MUST be set to 0x0. + */ + +#define ROUTING_HEADER_TYPE_SR 4 + +typedef struct { + /* Protocol for next header. */ + u8 protocol; + /* + * Length of routing header in 8 octet units, + * not including the first 8 octets + */ + u8 length; + + /* Type of routing header; type 4 = segement routing */ + u8 type; + + /* Next segment in the segment list */ + u8 segments_left; + + /* + * Policy list pointer: offset in the SRH of the policy + * list - in 16-octet units - not including the first 8 octets. + */ + u8 first_segment; + + /* Flag bits */ +#define IP6_SR_HEADER_FLAG_CLEANUP (0x8000) +#define IP6_SR_HEADER_FLAG_PROTECTED (0x4000) +#define IP6_SR_HEADER_FLAG_RESERVED (0x3000) + +#define IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT (0x0) +#define IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE (0x1) +#define IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE (0x2) +#define IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR (0x3) + /* values 0x4 - 0x7 are reserved */ + u16 flags; + u8 hmac_key; + + /* The segment + policy list elts */ + ip6_address_t segments[0]; +} __attribute__((packed)) ip6_sr_header_t; + +static inline int +ip6_sr_policy_list_shift_from_index (int pl_index) +{ + return (-3 * pl_index) + 12; +} + +/* pl_index is one-origined, to match the text above */ +static inline int +ip6_sr_policy_list_flags (u16 flags_host_byte_order, int pl_index) +{ + int shift; + + if (pl_index <= 0 || pl_index > 4) + return 0; + + shift = (-3 * pl_index) + 12; + flags_host_byte_order >>= shift; + + return (flags_host_byte_order & 7); +} + +#endif /* included_vnet_sr_packet_h */ diff --git a/vnet/vnet/srp/format.c b/vnet/vnet/srp/format.c new file mode 100644 index 00000000000..a0250cc976f --- /dev/null +++ b/vnet/vnet/srp/format.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * srp_format.c: srp formatting/parsing. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/srp/srp.h> +#include <vnet/ethernet/ethernet.h> + +static u8 * format_srp_mode (u8 * s, va_list * args) +{ + u32 mode = va_arg (*args, u32); + char * t = 0; + switch (mode) + { +#define _(f) case SRP_MODE_##f: t = #f; break; + foreach_srp_mode +#undef _ + default: t = 0; break; + } + if (t) + s = format (s, "%s", t); + else + s = format (s, "unknown 0x%x", mode); + + return s; +} + +u8 * format_srp_header_with_length (u8 * s, va_list * args) +{ + srp_and_ethernet_header_t * h = va_arg (*args, srp_and_ethernet_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + ethernet_main_t * em = ðernet_main; + uword indent, header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "srp header truncated"); + + indent = format_get_indent (s); + + s = format (s, "mode %U, ring %s, priority %d, ttl %d", + format_srp_mode, h->srp.mode, + h->srp.is_inner_ring ? "inner" : "outer", + h->srp.priority, h->srp.ttl); + + s = format (s, "\n%U%U: %U -> %U", + format_white_space, indent, + format_ethernet_type, clib_net_to_host_u16 (h->ethernet.type), + format_ethernet_address, h->ethernet.src_address, + format_ethernet_address, h->ethernet.dst_address); + + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ethernet_type_info_t * ti; + vlib_node_t * node; + + ti = ethernet_get_type_info (em, h->ethernet.type); + node = ti ? vlib_get_node (em->vlib_main, ti->node_index) : 0; + if (node && node->format_buffer) + s = format (s, "\n%U%U", + format_white_space, indent, + node->format_buffer, (void *) h + header_bytes, + max_header_bytes - header_bytes); + } + + return s; +} + +u8 * format_srp_header (u8 * s, va_list * args) +{ + srp_header_t * m = va_arg (*args, srp_header_t *); + return format (s, "%U", format_srp_header_with_length, m, 0); +} + +uword +unformat_srp_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + srp_and_ethernet_header_t * h; + + { + void * p; + vec_add2 (*result, p, sizeof (h[0])); + h = p; + } + + if (! unformat (input, "%U: %U -> %U", + unformat_ethernet_type_net_byte_order, &h->ethernet.type, + unformat_ethernet_address, &h->ethernet.src_address, + unformat_ethernet_address, &h->ethernet.dst_address)) + return 0; + + h->srp.mode = SRP_MODE_data; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + u32 x; + + if (unformat (input, "control")) + h->srp.mode = SRP_MODE_control_pass_to_host; + + else if (unformat (input, "pri %d", &x)) + h->srp.priority = x; + + else if (unformat (input, "ttl %d", &x)) + h->srp.ttl = x; + + else + return 0; + } + + return 1; +} diff --git a/vnet/vnet/srp/interface.c b/vnet/vnet/srp/interface.c new file mode 100644 index 00000000000..4c14b747751 --- /dev/null +++ b/vnet/vnet/srp/interface.c @@ -0,0 +1,458 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * srp_interface.c: srp interfaces + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/srp/srp.h> + +static uword srp_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + srp_main_t * sm = &srp_main; + srp_and_ethernet_header_t * h = rewrite; + u16 type; + uword n_bytes = sizeof (h[0]); + + if (n_bytes > max_rewrite_bytes) + return 0; + + switch (l3_type) { +#define _(a,b) case VNET_L3_PACKET_TYPE_##a: type = ETHERNET_TYPE_##b; break + _ (IP4, IP4); + _ (IP6, IP6); + _ (MPLS_UNICAST, MPLS_UNICAST); + _ (MPLS_MULTICAST, MPLS_MULTICAST); + _ (ARP, ARP); +#undef _ + default: + return 0; + } + + memcpy (h->ethernet.src_address, hw->hw_address, sizeof (h->ethernet.src_address)); + if (dst_address) + memcpy (h->ethernet.dst_address, dst_address, sizeof (h->ethernet.dst_address)); + else + memset (h->ethernet.dst_address, ~0, sizeof (h->ethernet.dst_address)); /* broadcast */ + + h->ethernet.type = clib_host_to_net_u16 (type); + + h->srp.as_u16 = 0; + h->srp.mode = SRP_MODE_data; + h->srp.ttl = sm->default_data_ttl; + srp_header_compute_parity (&h->srp); + + return n_bytes; +} + +static void srp_register_interface_helper (u32 * hw_if_indices_by_side, u32 redistribute); + +void serialize_srp_main (serialize_main_t * m, va_list * va) +{ + srp_main_t * sm = &srp_main; + srp_interface_t * si; + + serialize_integer (m, pool_elts (sm->interface_pool), sizeof (u32)); + pool_foreach (si, sm->interface_pool, ({ + serialize_integer (m, si->rings[SRP_RING_OUTER].hw_if_index, sizeof (u32)); + serialize_integer (m, si->rings[SRP_RING_INNER].hw_if_index, sizeof (u32)); + })); +} + +void unserialize_srp_main (serialize_main_t * m, va_list * va) +{ + u32 i, n_ifs, hw_if_indices[SRP_N_RING]; + + unserialize_integer (m, &n_ifs, sizeof (u32)); + for (i = 0; i < n_ifs; i++) + { + unserialize_integer (m, &hw_if_indices[SRP_RING_OUTER], sizeof (u32)); + unserialize_integer (m, &hw_if_indices[SRP_RING_INNER], sizeof (u32)); + srp_register_interface_helper (hw_if_indices, /* redistribute */ 0); + } +} + +static void serialize_srp_register_interface_msg (serialize_main_t * m, va_list * va) +{ + u32 * hw_if_indices = va_arg (*va, u32 *); + serialize_integer (m, hw_if_indices[SRP_SIDE_A], sizeof (hw_if_indices[SRP_SIDE_A])); + serialize_integer (m, hw_if_indices[SRP_SIDE_B], sizeof (hw_if_indices[SRP_SIDE_B])); +} + +static void unserialize_srp_register_interface_msg (serialize_main_t * m, va_list * va) +{ + CLIB_UNUSED (mc_main_t * mcm) = va_arg (*va, mc_main_t *); + u32 hw_if_indices[SRP_N_SIDE]; + srp_main_t * sm = &srp_main; + uword * p; + + unserialize_integer (m, &hw_if_indices[SRP_SIDE_A], sizeof (hw_if_indices[SRP_SIDE_A])); + unserialize_integer (m, &hw_if_indices[SRP_SIDE_B], sizeof (hw_if_indices[SRP_SIDE_B])); + + p = hash_get (sm->srp_register_interface_waiting_process_pool_index_by_hw_if_index, + hw_if_indices[0]); + if (p) + { + vlib_one_time_waiting_process_t * wp = pool_elt_at_index (sm->srp_register_interface_waiting_process_pool, p[0]); + vlib_signal_one_time_waiting_process (mcm->vlib_main, wp); + pool_put (sm->srp_register_interface_waiting_process_pool, wp); + hash_unset (sm->srp_register_interface_waiting_process_pool_index_by_hw_if_index, + hw_if_indices[0]); + } + else + srp_register_interface_helper (hw_if_indices, /* redistribute */ 0); +} + +MC_SERIALIZE_MSG (srp_register_interface_msg, static) = { + .name = "vnet_srp_register_interface", + .serialize = serialize_srp_register_interface_msg, + .unserialize = unserialize_srp_register_interface_msg, +}; + +static void srp_register_interface_helper (u32 * hw_if_indices_by_side, u32 redistribute) +{ + vnet_main_t * vnm = vnet_get_main(); + srp_main_t * sm = &srp_main; + vlib_main_t * vm = sm->vlib_main; + srp_interface_t * si; + vnet_hw_interface_t * hws[SRP_N_RING]; + uword s, * p; + + if (vm->mc_main && redistribute) + { + vlib_one_time_waiting_process_t * wp; + mc_serialize (vm->mc_main, &srp_register_interface_msg, hw_if_indices_by_side); + pool_get (sm->srp_register_interface_waiting_process_pool, wp); + hash_set (sm->srp_register_interface_waiting_process_pool_index_by_hw_if_index, + hw_if_indices_by_side[0], + wp - sm->srp_register_interface_waiting_process_pool); + vlib_current_process_wait_for_one_time_event (vm, wp); + } + + /* Check if interface has already been registered. */ + p = hash_get (sm->interface_index_by_hw_if_index, hw_if_indices_by_side[0]); + if (p) + { + si = pool_elt_at_index (sm->interface_pool, p[0]); + } + else + { + pool_get (sm->interface_pool, si); + memset (si, 0, sizeof (si[0])); + } + for (s = 0; s < SRP_N_SIDE; s++) + { + hws[s] = vnet_get_hw_interface (vnm, hw_if_indices_by_side[s]); + si->rings[s].ring = s; + si->rings[s].hw_if_index = hw_if_indices_by_side[s]; + si->rings[s].sw_if_index = hws[s]->sw_if_index; + hash_set (sm->interface_index_by_hw_if_index, hw_if_indices_by_side[s], si - sm->interface_pool); + } + + /* Inherit MAC address from outer ring. */ + memcpy (si->my_address, hws[SRP_RING_OUTER]->hw_address, + vec_len (hws[SRP_RING_OUTER]->hw_address)); + + /* Default time to wait to restore signal. */ + si->config.wait_to_restore_idle_delay = 60; + si->config.ips_tx_interval = 1; +} + +void srp_register_interface (u32 * hw_if_indices_by_side) +{ + srp_register_interface_helper (hw_if_indices_by_side, /* redistribute */ 1); +} + +void srp_interface_set_hw_wrap_function (u32 hw_if_index, srp_hw_wrap_function_t * f) +{ + srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index); + si->hw_wrap_function = f; +} + +void srp_interface_set_hw_enable_function (u32 hw_if_index, srp_hw_enable_function_t * f) +{ + srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index); + si->hw_enable_function = f; +} + +void srp_interface_enable_ips (u32 hw_if_index) +{ + srp_main_t * sm = &srp_main; + srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index); + + si->ips_process_enable = 1; + + vlib_node_set_state (sm->vlib_main, srp_ips_process_node.index, VLIB_NODE_STATE_POLLING); +} + +static uword +srp_is_valid_class_for_interface (vnet_main_t * vnm, u32 hw_if_index, u32 hw_class_index) +{ + srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index); + + if (! si) + return 0; + + /* Both sides must be admin down. */ + if (vnet_sw_interface_is_admin_up (vnm, si->rings[SRP_RING_OUTER].sw_if_index)) + return 0; + if (vnet_sw_interface_is_admin_up (vnm, si->rings[SRP_RING_INNER].sw_if_index)) + return 0; + + return 1; +} + +static void +srp_interface_hw_class_change (vnet_main_t * vnm, u32 hw_if_index, + u32 old_hw_class_index, u32 new_hw_class_index) +{ + srp_main_t * sm = &srp_main; + srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index); + vnet_hw_interface_t * hi; + vnet_device_class_t * dc; + u32 r, to_srp; + + if (!si) { + clib_warning ("srp interface no set si = 0"); + return; + } + + to_srp = new_hw_class_index == srp_hw_interface_class.index; + + /* Changing class on either outer or inner rings implies changing the class + of the other. */ + for (r = 0; r < SRP_N_RING; r++) + { + srp_interface_ring_t * ir = &si->rings[r]; + + hi = vnet_get_hw_interface (vnm, ir->hw_if_index); + dc = vnet_get_device_class (vnm, hi->dev_class_index); + + /* hw_if_index itself will be handled by caller. */ + if (ir->hw_if_index != hw_if_index) + { + vnet_hw_interface_init_for_class (vnm, ir->hw_if_index, + new_hw_class_index, + to_srp ? si - sm->interface_pool : ~0); + + if (dc->hw_class_change) + dc->hw_class_change (vnm, ir->hw_if_index, new_hw_class_index); + } + else + hi->hw_instance = to_srp ? si - sm->interface_pool : ~0; + } + + if (si->hw_enable_function) + si->hw_enable_function (si, /* enable */ to_srp); +} + +VNET_HW_INTERFACE_CLASS (srp_hw_interface_class) = { + .name = "SRP", + .format_address = format_ethernet_address, + .format_header = format_srp_header_with_length, + .format_device = format_srp_device, + .unformat_hw_address = unformat_ethernet_address, + .unformat_header = unformat_srp_header, + .set_rewrite = srp_set_rewrite, + .is_valid_class_for_interface = srp_is_valid_class_for_interface, + .hw_class_change = srp_interface_hw_class_change, +}; + +static void serialize_srp_interface_config_msg (serialize_main_t * m, va_list * va) +{ + srp_interface_t * si = va_arg (*va, srp_interface_t *); + srp_main_t * sm = &srp_main; + + ASSERT (! pool_is_free (sm->interface_pool, si)); + serialize_integer (m, si - sm->interface_pool, sizeof (u32)); + serialize (m, serialize_f64, si->config.wait_to_restore_idle_delay); + serialize (m, serialize_f64, si->config.ips_tx_interval); +} + +static void unserialize_srp_interface_config_msg (serialize_main_t * m, va_list * va) +{ + CLIB_UNUSED (mc_main_t * mcm) = va_arg (*va, mc_main_t *); + srp_main_t * sm = &srp_main; + srp_interface_t * si; + u32 si_index; + + unserialize_integer (m, &si_index, sizeof (u32)); + si = pool_elt_at_index (sm->interface_pool, si_index); + unserialize (m, unserialize_f64, &si->config.wait_to_restore_idle_delay); + unserialize (m, unserialize_f64, &si->config.ips_tx_interval); +} + +MC_SERIALIZE_MSG (srp_interface_config_msg, static) = { + .name = "vnet_srp_interface_config", + .serialize = serialize_srp_interface_config_msg, + .unserialize = unserialize_srp_interface_config_msg, +}; + +void srp_interface_get_interface_config (u32 hw_if_index, srp_interface_config_t * c) +{ + srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index); + ASSERT (si != 0); + c[0] = si->config; +} + +void srp_interface_set_interface_config (u32 hw_if_index, srp_interface_config_t * c) +{ + srp_main_t * sm = &srp_main; + vlib_main_t * vm = sm->vlib_main; + srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index); + ASSERT (si != 0); + if (memcmp (&si->config, &c[0], sizeof (c[0]))) + { + si->config = c[0]; + if (vm->mc_main) + mc_serialize (vm->mc_main, &srp_interface_config_msg, si); + } +} + +#if DEBUG > 0 + +#define VNET_SIMULATED_SRP_TX_NEXT_SRP_INPUT VNET_INTERFACE_TX_N_NEXT + +/* Echo packets back to srp input. */ +static uword +simulated_srp_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, n_left_to_next, n_copy, * from, * to_next; + u32 next_index = VNET_SIMULATED_SRP_TX_NEXT_SRP_INPUT; + u32 i; + vlib_buffer_t * b; + + n_left_from = frame->n_vectors; + from = vlib_frame_args (frame); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + n_copy = clib_min (n_left_from, n_left_to_next); + + memcpy (to_next, from, n_copy * sizeof (from[0])); + n_left_to_next -= n_copy; + n_left_from -= n_copy; + for (i = 0; i < n_copy; i++) + { + b = vlib_get_buffer (vm, from[i]); + /* TX interface will be fake eth; copy to RX for benefit of srp-input. */ + b->sw_if_index[VLIB_RX] = b->sw_if_index[VLIB_TX]; + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return n_left_from; +} + +static u8 * format_simulated_srp_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "fake-srp%d", dev_instance); +} + +VNET_DEVICE_CLASS (srp_simulated_device_class,static) = { + .name = "Simulated srp", + .format_device_name = format_simulated_srp_name, + .tx_function = simulated_srp_interface_tx, +}; + +static clib_error_t * +create_simulated_srp_interfaces (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u8 address[6]; + u32 hw_if_index; + vnet_hw_interface_t * hi; + static u32 instance; + + if (! unformat_user (input, unformat_ethernet_address, &address)) + { + memset (address, 0, sizeof (address)); + address[0] = 0xde; + address[1] = 0xad; + address[5] = instance; + } + + hw_if_index = vnet_register_interface (vnm, + srp_simulated_device_class.index, + instance++, + srp_hw_interface_class.index, 0); + + hi = vnet_get_hw_interface (vnm, hw_if_index); + + srp_setup_node (vm, hi->output_node_index); + + hi->min_packet_bytes = 40 + 16; + + /* Standard default ethernet MTU. */ + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1500; + + vec_free (hi->hw_address); + vec_add (hi->hw_address, address, sizeof (address)); + + { + uword slot; + + slot = vlib_node_add_named_next_with_slot + (vm, hi->tx_node_index, + "srp-input", + VNET_SIMULATED_SRP_TX_NEXT_SRP_INPUT); + ASSERT (slot == VNET_SIMULATED_SRP_TX_NEXT_SRP_INPUT); + } + + return /* no error */ 0; +} + +static VLIB_CLI_COMMAND (create_simulated_srp_interface_command) = { + .path = "srp create-interfaces", + .short_help = "Create simulated srp interface", + .function = create_simulated_srp_interfaces, +}; +#endif diff --git a/vnet/vnet/srp/node.c b/vnet/vnet/srp/node.c new file mode 100644 index 00000000000..a44f108326e --- /dev/null +++ b/vnet/vnet/srp/node.c @@ -0,0 +1,929 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * node.c: srp packet processing + * + * Copyright (c) 2011 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/ip/ip_packet.h> /* for ip_csum_fold */ +#include <vnet/srp/srp.h> + +typedef struct { + u8 packet_data[32]; +} srp_input_trace_t; + +static u8 * format_srp_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + srp_input_trace_t * t = va_arg (*va, srp_input_trace_t *); + + s = format (s, "%U", format_srp_header, t->packet_data); + + return s; +} + +typedef enum { + SRP_INPUT_NEXT_ERROR, + SRP_INPUT_NEXT_ETHERNET_INPUT, + SRP_INPUT_NEXT_CONTROL, + SRP_INPUT_N_NEXT, +} srp_input_next_t; + +typedef struct { + u8 next_index; + u8 buffer_advance; + u16 error; +} srp_input_disposition_t; + +static srp_input_disposition_t srp_input_disposition_by_mode[8] = { + [SRP_MODE_reserved0] = { + .next_index = SRP_INPUT_NEXT_ERROR, + .error = SRP_ERROR_UNKNOWN_MODE, + }, + [SRP_MODE_reserved1] = { + .next_index = SRP_INPUT_NEXT_ERROR, + .error = SRP_ERROR_UNKNOWN_MODE, + }, + [SRP_MODE_reserved2] = { + .next_index = SRP_INPUT_NEXT_ERROR, + .error = SRP_ERROR_UNKNOWN_MODE, + }, + [SRP_MODE_reserved3] = { + .next_index = SRP_INPUT_NEXT_ERROR, + .error = SRP_ERROR_UNKNOWN_MODE, + }, + [SRP_MODE_keep_alive] = { + .next_index = SRP_INPUT_NEXT_ERROR, + .error = SRP_ERROR_KEEP_ALIVE_DROPPED, + }, + [SRP_MODE_data] = { + .next_index = SRP_INPUT_NEXT_ETHERNET_INPUT, + .buffer_advance = sizeof (srp_header_t), + }, + [SRP_MODE_control_pass_to_host] = { + .next_index = SRP_INPUT_NEXT_CONTROL, + }, + [SRP_MODE_control_locally_buffered_for_host] = { + .next_index = SRP_INPUT_NEXT_CONTROL, + }, +}; + +static uword +srp_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + vnet_main_t * vnm = vnet_get_main(); + srp_main_t * sm = &srp_main; + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, + from, + n_left_from, + sizeof (from[0]), + sizeof (srp_input_trace_t)); + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1, sw_if_index0, sw_if_index1; + vlib_buffer_t * b0, * b1; + u8 next0, next1, error0, error1; + srp_header_t * s0, * s1; + srp_input_disposition_t * d0, * d1; + vnet_hw_interface_t * hi0, * hi1; + srp_interface_t * si0, * si1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * b2, * b3; + + b2 = vlib_get_buffer (vm, from[2]); + b3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (b2, LOAD); + vlib_prefetch_buffer_header (b3, LOAD); + + CLIB_PREFETCH (b2->data, sizeof (srp_header_t), LOAD); + CLIB_PREFETCH (b3->data, sizeof (srp_header_t), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + s0 = (void *) (b0->data + b0->current_data); + s1 = (void *) (b1->data + b1->current_data); + + /* Data packets are always assigned to side A (outer ring) interface. */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); + + si0 = pool_elt_at_index (sm->interface_pool, hi0->hw_instance); + si1 = pool_elt_at_index (sm->interface_pool, hi1->hw_instance); + + sw_if_index0 = (s0->mode == SRP_MODE_data + ? si0->rings[SRP_RING_OUTER].sw_if_index + : sw_if_index0); + sw_if_index1 = (s1->mode == SRP_MODE_data + ? si1->rings[SRP_RING_OUTER].sw_if_index + : sw_if_index1); + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0; + vnet_buffer (b1)->sw_if_index[VLIB_RX] = sw_if_index1; + + d0 = srp_input_disposition_by_mode + s0->mode; + d1 = srp_input_disposition_by_mode + s1->mode; + + next0 = d0->next_index; + next1 = d1->next_index; + + error0 = d0->error; + error1 = d1->error; + + vlib_buffer_advance (b0, d0->buffer_advance); + vlib_buffer_advance (b1, d1->buffer_advance); + + b0->error = node->errors[error0]; + b1->error = node->errors[error1]; + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, sw_if_index0; + vlib_buffer_t * b0; + u8 next0, error0; + srp_header_t * s0; + srp_input_disposition_t * d0; + srp_interface_t * si0; + vnet_hw_interface_t * hi0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + s0 = (void *) (b0->data + b0->current_data); + + /* Data packets are always assigned to side A (outer ring) interface. */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + si0 = pool_elt_at_index (sm->interface_pool, hi0->hw_instance); + + sw_if_index0 = (s0->mode == SRP_MODE_data + ? si0->rings[SRP_RING_OUTER].sw_if_index + : sw_if_index0); + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0; + + d0 = srp_input_disposition_by_mode + s0->mode; + + next0 = d0->next_index; + + error0 = d0->error; + + vlib_buffer_advance (b0, d0->buffer_advance); + + b0->error = node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static char * srp_error_strings[] = { +#define _(f,s) s, + foreach_srp_error +#undef _ +}; + +VLIB_REGISTER_NODE (srp_input_node,static) = { + .function = srp_input, + .name = "srp-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = SRP_N_ERROR, + .error_strings = srp_error_strings, + + .n_next_nodes = SRP_INPUT_N_NEXT, + .next_nodes = { + [SRP_INPUT_NEXT_ERROR] = "error-drop", + [SRP_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input", + [SRP_INPUT_NEXT_CONTROL] = "srp-control", + }, + + .format_buffer = format_srp_header_with_length, + .format_trace = format_srp_input_trace, + .unformat_buffer = unformat_srp_header, +}; + +static uword +srp_topology_packet (vlib_main_t * vm, u32 sw_if_index, u8 ** contents) +{ + vnet_main_t * vnm = vnet_get_main(); + vnet_hw_interface_t * hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + srp_topology_header_t * t; + srp_topology_mac_binding_t * mb; + u32 nb, nmb; + + t = (void *) *contents; + + nb = clib_net_to_host_u16 (t->n_bytes_of_data_that_follows); + nmb = (nb - sizeof (t->originator_address)) / sizeof (mb[0]); + if (vec_len (*contents) < sizeof (t[0]) + nmb * sizeof (mb[0])) + return SRP_ERROR_TOPOLOGY_BAD_LENGTH; + + /* Fill in our source MAC address. */ + memcpy (t->ethernet.src_address, hi->hw_address, vec_len (hi->hw_address)); + + /* Make space for our MAC binding. */ + vec_resize (*contents, sizeof (srp_topology_mac_binding_t)); + t = (void *) *contents; + t->n_bytes_of_data_that_follows = clib_host_to_net_u16 (nb + sizeof (mb[0])); + + mb = t->bindings + nmb; + + mb->flags = + ((t->srp.is_inner_ring ? SRP_TOPOLOGY_MAC_BINDING_FLAG_IS_INNER_RING : 0) + | (/* is wrapped FIXME */ 0)); + memcpy (mb->address, hi->hw_address, vec_len (hi->hw_address)); + + t->control.checksum + = ~ip_csum_fold (ip_incremental_checksum (0, &t->control, + vec_len (*contents) - STRUCT_OFFSET_OF (srp_generic_control_header_t, control))); + + { + vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index); + vlib_buffer_t * b; + u32 * to_next = vlib_frame_vector_args (f); + u32 bi; + + bi = vlib_buffer_add_data (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX, + /* buffer to append to */ 0, + *contents, vec_len (*contents)); + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } + + return SRP_ERROR_CONTROL_PACKETS_PROCESSED; +} + +typedef uword (srp_control_handler_function_t) (vlib_main_t * vm, + u32 sw_if_index, + u8 ** contents); + +static uword +srp_control_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + vlib_node_runtime_t * error_node; + static u8 * contents; + + error_node = vlib_node_get_runtime (vm, srp_input_node.index); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, + from, + n_left_from, + sizeof (from[0]), + sizeof (srp_input_trace_t)); + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, l2_len0, l3_len0; + vlib_buffer_t * b0; + u8 next0, error0; + srp_generic_control_header_t * s0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + s0 = (void *) (b0->data + b0->current_data); + l2_len0 = vlib_buffer_length_in_chain (vm, b0); + l3_len0 = l2_len0 - STRUCT_OFFSET_OF (srp_generic_control_header_t, control); + + error0 = SRP_ERROR_CONTROL_PACKETS_PROCESSED; + + error0 = s0->control.version != 0 ? SRP_ERROR_CONTROL_VERSION_NON_ZERO : error0; + + { + u16 save0 = s0->control.checksum; + u16 computed0; + s0->control.checksum = 0; + computed0 = ~ip_csum_fold (ip_incremental_checksum (0, &s0->control, l3_len0)); + error0 = save0 != computed0 ? SRP_ERROR_CONTROL_BAD_CHECKSUM : error0; + } + + if (error0 == SRP_ERROR_CONTROL_PACKETS_PROCESSED) + { + static srp_control_handler_function_t * t[SRP_N_CONTROL_PACKET_TYPE] = { + [SRP_CONTROL_PACKET_TYPE_topology] = srp_topology_packet, + }; + srp_control_handler_function_t * f; + + f = 0; + if (s0->control.type < ARRAY_LEN (t)) + f = t[s0->control.type]; + + if (f) + { + vec_validate (contents, l2_len0 - 1); + vlib_buffer_contents (vm, bi0, contents); + error0 = f (vm, vnet_buffer (b0)->sw_if_index[VLIB_RX], &contents); + } + else + error0 = SRP_ERROR_UNKNOWN_CONTROL; + } + + b0->error = error_node->errors[error0]; + next0 = 0; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (srp_control_input_node,static) = { + .function = srp_control_input, + .name = "srp-control", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, + + .format_buffer = format_srp_header_with_length, + .format_trace = format_srp_input_trace, + .unformat_buffer = unformat_srp_header, +}; + +static u8 * format_srp_ips_request_type (u8 * s, va_list * args) +{ + u32 x = va_arg (*args, u32); + char * t = 0; + switch (x) + { +#define _(f,n) case SRP_IPS_REQUEST_##f: t = #f; break; + foreach_srp_ips_request_type +#undef _ + default: + return format (s, "unknown 0x%x", x); + } + return format (s, "%U", format_c_identifier, t); +} + +static u8 * format_srp_ips_status (u8 * s, va_list * args) +{ + u32 x = va_arg (*args, u32); + char * t = 0; + switch (x) + { +#define _(f,n) case SRP_IPS_STATUS_##f: t = #f; break; + foreach_srp_ips_status +#undef _ + default: + return format (s, "unknown 0x%x", x); + } + return format (s, "%U", format_c_identifier, t); +} + +static u8 * format_srp_ips_state (u8 * s, va_list * args) +{ + u32 x = va_arg (*args, u32); + char * t = 0; + switch (x) + { +#define _(f) case SRP_IPS_STATE_##f: t = #f; break; + foreach_srp_ips_state +#undef _ + default: + return format (s, "unknown 0x%x", x); + } + return format (s, "%U", format_c_identifier, t); +} + +static u8 * format_srp_ring (u8 * s, va_list * args) +{ + u32 ring = va_arg (*args, u32); + return format (s, "%s", ring == SRP_RING_INNER ? "inner" : "outer"); +} + +static u8 * format_srp_ips_header (u8 * s, va_list * args) +{ + srp_ips_header_t * h = va_arg (*args, srp_ips_header_t *); + + s = format (s, "%U, %U, %U, %s-path", + format_srp_ips_request_type, h->request_type, + format_ethernet_address, h->originator_address, + format_srp_ips_status, h->status, + h->is_long_path ? "long" : "short"); + + return s; +} + +static u8 * format_srp_interface (u8 * s, va_list * args) +{ + srp_interface_t * si = va_arg (*args, srp_interface_t *); + srp_interface_ring_t * ir; + + s = format (s, "address %U, IPS state %U", + format_ethernet_address, si->my_address, + format_srp_ips_state, si->current_ips_state); + for (ir = si->rings; ir < si->rings + SRP_N_RING; ir++) + if (ir->rx_neighbor_address_valid) + s = format (s, ", %U neighbor %U", + format_srp_ring, ir->ring, + format_ethernet_address, ir->rx_neighbor_address); + + return s; +} + +u8 * format_srp_device (u8 * s, va_list * args) +{ + u32 hw_if_index = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + vnet_main_t * vnm = vnet_get_main(); + srp_main_t * sm = &srp_main; + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); + srp_interface_t * si = pool_elt_at_index (sm->interface_pool, hi->hw_instance); + return format (s, "%U", format_srp_interface, si); +} + +always_inline srp_interface_t * +srp_get_interface (u32 sw_if_index, srp_ring_type_t * ring) +{ + vnet_main_t * vnm = vnet_get_main(); + srp_main_t * sm = &srp_main; + vnet_hw_interface_t * hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + srp_interface_t * si; + + ASSERT (hi->hw_class_index == srp_hw_interface_class.index); + si = pool_elt_at_index (sm->interface_pool, hi->hw_instance); + + ASSERT (si->rings[SRP_RING_INNER].hw_if_index == hi->hw_if_index + || si->rings[SRP_RING_OUTER].hw_if_index == hi->hw_if_index); + if (ring) + *ring = + (hi->hw_if_index == si->rings[SRP_RING_INNER].hw_if_index + ? SRP_RING_INNER + : SRP_RING_OUTER); + + return si; +} + +static void init_ips_packet (srp_interface_t * si, + srp_ring_type_t tx_ring, + srp_ips_header_t * i) +{ + memset (i, 0, sizeof (i[0])); + + i->srp.ttl = 1; + i->srp.is_inner_ring = tx_ring; + i->srp.priority = 7; + i->srp.mode = SRP_MODE_control_locally_buffered_for_host; + srp_header_compute_parity (&i->srp); + + memcpy (&i->ethernet.src_address, &si->my_address, sizeof (si->my_address)); + i->ethernet.type = clib_host_to_net_u16 (ETHERNET_TYPE_SRP_CONTROL); + + /* Checksum will be filled in later. */ + i->control.version = 0; + i->control.type = SRP_CONTROL_PACKET_TYPE_ips; + i->control.ttl = 255; + + memcpy (&i->originator_address, &si->my_address, sizeof (si->my_address)); +} + +static void tx_ips_packet (srp_interface_t * si, + srp_ring_type_t tx_ring, + srp_ips_header_t * i) +{ + srp_main_t * sm = &srp_main; + vnet_main_t * vnm = vnet_get_main(); + vlib_main_t * vm = sm->vlib_main; + vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, si->rings[tx_ring].hw_if_index); + vlib_frame_t * f; + vlib_buffer_t * b; + u32 * to_next, bi; + + if (! vnet_sw_interface_is_admin_up (vnm, hi->sw_if_index)) + return; + if (hi->hw_class_index != srp_hw_interface_class.index) + return; + + i->control.checksum + = ~ip_csum_fold (ip_incremental_checksum (0, &i->control, + sizeof (i[0]) - STRUCT_OFFSET_OF (srp_ips_header_t, control))); + + bi = vlib_buffer_add_data (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX, + /* buffer to append to */ 0, + i, sizeof (i[0])); + + /* FIXME trace. */ + if (0) + clib_warning ("%U %U", + format_vnet_sw_if_index_name, vnm, hi->sw_if_index, + format_srp_ips_header, i); + + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = hi->sw_if_index; + + f = vlib_get_frame_to_node (vm, hi->output_node_index); + to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); +} + +static void serialize_srp_interface_state_msg (serialize_main_t * m, va_list * va) +{ + srp_interface_t * si = va_arg (*va, srp_interface_t *); + srp_main_t * sm = &srp_main; + int r; + + ASSERT (! pool_is_free (sm->interface_pool, si)); + serialize_integer (m, si - sm->interface_pool, sizeof (u32)); + serialize_likely_small_unsigned_integer (m, si->current_ips_state); + for (r = 0; r < SRP_N_RING; r++) + { + srp_interface_ring_t * ir = &si->rings[r]; + void * p; + serialize_likely_small_unsigned_integer (m, ir->rx_neighbor_address_valid); + if (ir->rx_neighbor_address_valid) + { + p = serialize_get (m, sizeof (ir->rx_neighbor_address)); + memcpy (p, ir->rx_neighbor_address, sizeof (ir->rx_neighbor_address)); + } + serialize_likely_small_unsigned_integer (m, ir->waiting_to_restore); + if (ir->waiting_to_restore) + serialize (m, serialize_f64, ir->wait_to_restore_start_time); + } +} + +static void unserialize_srp_interface_state_msg (serialize_main_t * m, va_list * va) +{ + CLIB_UNUSED (mc_main_t * mcm) = va_arg (*va, mc_main_t *); + srp_main_t * sm = &srp_main; + srp_interface_t * si; + u32 si_index, r; + + unserialize_integer (m, &si_index, sizeof (u32)); + si = pool_elt_at_index (sm->interface_pool, si_index); + si->current_ips_state = unserialize_likely_small_unsigned_integer (m); + for (r = 0; r < SRP_N_RING; r++) + { + srp_interface_ring_t * ir = &si->rings[r]; + void * p; + ir->rx_neighbor_address_valid = unserialize_likely_small_unsigned_integer (m); + if (ir->rx_neighbor_address_valid) + { + p = unserialize_get (m, sizeof (ir->rx_neighbor_address)); + memcpy (ir->rx_neighbor_address, p, sizeof (ir->rx_neighbor_address)); + } + ir->waiting_to_restore = unserialize_likely_small_unsigned_integer (m); + if (ir->waiting_to_restore) + unserialize (m, unserialize_f64, &ir->wait_to_restore_start_time); + } +} + +MC_SERIALIZE_MSG (srp_interface_state_msg, static) = { + .name = "vnet_srp_interface_state", + .serialize = serialize_srp_interface_state_msg, + .unserialize = unserialize_srp_interface_state_msg, +}; + +static int requests_switch (srp_ips_request_type_t r) +{ + static u8 t[16] = { + [SRP_IPS_REQUEST_forced_switch] = 1, + [SRP_IPS_REQUEST_manual_switch] = 1, + [SRP_IPS_REQUEST_signal_fail] = 1, + [SRP_IPS_REQUEST_signal_degrade] = 1, + }; + return r < ARRAY_LEN (t) ? t[r] : 0; +} + +/* Called when an IPS control packet is received on given interface. */ +void srp_ips_rx_packet (u32 sw_if_index, srp_ips_header_t * h) +{ + vnet_main_t * vnm = vnet_get_main(); + vlib_main_t * vm = srp_main.vlib_main; + srp_ring_type_t rx_ring; + srp_interface_t * si = srp_get_interface (sw_if_index, &rx_ring); + srp_interface_ring_t * ir = &si->rings[rx_ring]; + int si_needs_broadcast = 0; + + /* FIXME trace. */ + if (0) + clib_warning ("%U %U %U", + format_time_interval, "h:m:s:u", vlib_time_now (vm), + format_vnet_sw_if_index_name, vnm, sw_if_index, + format_srp_ips_header, h); + + /* Ignore self-generated IPS packets. */ + if (! memcmp (h->originator_address, si->my_address, sizeof (h->originator_address))) + goto done; + + /* Learn neighbor address from short path messages. */ + if (! h->is_long_path) + { + if (ir->rx_neighbor_address_valid + && memcmp (ir->rx_neighbor_address, h->originator_address, sizeof (ir->rx_neighbor_address))) + { + ASSERT (0); + } + ir->rx_neighbor_address_valid = 1; + memcpy (ir->rx_neighbor_address, h->originator_address, sizeof (ir->rx_neighbor_address)); + } + + switch (si->current_ips_state) + { + case SRP_IPS_STATE_idle: + /* Received {REQ,NEIGHBOR,W,S} in idle state: wrap. */ + if (requests_switch (h->request_type) + && ! h->is_long_path + && h->status == SRP_IPS_STATUS_wrapped) + { + srp_ips_header_t to_tx[2]; + + si_needs_broadcast = 1; + si->current_ips_state = SRP_IPS_STATE_wrapped; + si->hw_wrap_function (si->rings[SRP_SIDE_A].hw_if_index, /* enable_wrap */ 1); + si->hw_wrap_function (si->rings[SRP_SIDE_B].hw_if_index, /* enable_wrap */ 1); + + init_ips_packet (si, rx_ring ^ 0, &to_tx[0]); + to_tx[0].request_type = SRP_IPS_REQUEST_idle; + to_tx[0].status = SRP_IPS_STATUS_wrapped; + to_tx[0].is_long_path = 0; + tx_ips_packet (si, rx_ring ^ 0, &to_tx[0]); + + init_ips_packet (si, rx_ring ^ 1, &to_tx[1]); + to_tx[1].request_type = h->request_type; + to_tx[1].status = SRP_IPS_STATUS_wrapped; + to_tx[1].is_long_path = 1; + tx_ips_packet (si, rx_ring ^ 1, &to_tx[1]); + } + break; + + case SRP_IPS_STATE_wrapped: + if (! h->is_long_path + && h->request_type == SRP_IPS_REQUEST_idle + && h->status == SRP_IPS_STATUS_idle) + { + si_needs_broadcast = 1; + si->current_ips_state = SRP_IPS_STATE_idle; + si->hw_wrap_function (si->rings[SRP_SIDE_A].hw_if_index, /* enable_wrap */ 0); + si->hw_wrap_function (si->rings[SRP_SIDE_B].hw_if_index, /* enable_wrap */ 0); + } + break; + + case SRP_IPS_STATE_pass_thru: + /* FIXME */ + break; + + default: + abort (); + break; + } + + done: + if (vm->mc_main && si_needs_broadcast) + mc_serialize (vm->mc_main, &srp_interface_state_msg, si); +} + +/* Preform local IPS request on given interface. */ +void srp_ips_local_request (u32 sw_if_index, srp_ips_request_type_t request) +{ + vnet_main_t * vnm = vnet_get_main(); + srp_main_t * sm = &srp_main; + vlib_main_t * vm = sm->vlib_main; + srp_ring_type_t rx_ring; + srp_interface_t * si = srp_get_interface (sw_if_index, &rx_ring); + srp_interface_ring_t * ir = &si->rings[rx_ring]; + int si_needs_broadcast = 0; + + if (request == SRP_IPS_REQUEST_wait_to_restore) + { + if (si->current_ips_state != SRP_IPS_STATE_wrapped) + return; + if (! ir->waiting_to_restore) + { + ir->wait_to_restore_start_time = vlib_time_now (sm->vlib_main); + ir->waiting_to_restore = 1; + si_needs_broadcast = 1; + } + } + else + { + /* FIXME handle local signal fail. */ + si_needs_broadcast = ir->waiting_to_restore; + ir->wait_to_restore_start_time = 0; + ir->waiting_to_restore = 0; + } + + /* FIXME trace. */ + if (0) + clib_warning ("%U %U", + format_vnet_sw_if_index_name, vnm, sw_if_index, + format_srp_ips_request_type, request); + + if (vm->mc_main && si_needs_broadcast) + mc_serialize (vm->mc_main, &srp_interface_state_msg, si); +} + +static void maybe_send_ips_message (srp_interface_t * si) +{ + srp_main_t * sm = &srp_main; + srp_ips_header_t to_tx[2]; + srp_ring_type_t rx_ring = SRP_RING_OUTER; + srp_interface_ring_t * r0 = &si->rings[rx_ring ^ 0]; + srp_interface_ring_t * r1 = &si->rings[rx_ring ^ 1]; + f64 now = vlib_time_now (sm->vlib_main); + + if (! si->ips_process_enable) + return; + + if (si->current_ips_state == SRP_IPS_STATE_wrapped + && r0->waiting_to_restore + && r1->waiting_to_restore + && now >= r0->wait_to_restore_start_time + si->config.wait_to_restore_idle_delay + && now >= r1->wait_to_restore_start_time + si->config.wait_to_restore_idle_delay) + { + si->current_ips_state = SRP_IPS_STATE_idle; + r0->waiting_to_restore = r1->waiting_to_restore = 0; + r0->wait_to_restore_start_time = r1->wait_to_restore_start_time = 0; + } + + if (si->current_ips_state != SRP_IPS_STATE_idle) + return; + + init_ips_packet (si, rx_ring ^ 0, &to_tx[0]); + init_ips_packet (si, rx_ring ^ 1, &to_tx[1]); + + if (si->current_ips_state == SRP_IPS_STATE_idle) + { + to_tx[0].request_type = to_tx[1].request_type = SRP_IPS_REQUEST_idle; + to_tx[0].status = to_tx[1].status = SRP_IPS_STATUS_idle; + to_tx[0].is_long_path = to_tx[1].is_long_path = 0; + } + + else if (si->current_ips_state == SRP_IPS_STATE_wrapped) + { + to_tx[0].request_type = + (si->rings[rx_ring ^ 0].waiting_to_restore + ? SRP_IPS_REQUEST_wait_to_restore + : SRP_IPS_REQUEST_signal_fail); + to_tx[1].request_type = + (si->rings[rx_ring ^ 1].waiting_to_restore + ? SRP_IPS_REQUEST_wait_to_restore + : SRP_IPS_REQUEST_signal_fail); + to_tx[0].status = to_tx[1].status = SRP_IPS_STATUS_wrapped; + to_tx[0].is_long_path = 0; + to_tx[1].is_long_path = 1; + } + + tx_ips_packet (si, rx_ring ^ 0, &to_tx[0]); + tx_ips_packet (si, rx_ring ^ 1, &to_tx[1]); +} + +static uword +srp_ips_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + srp_main_t * sm = &srp_main; + srp_interface_t * si; + + while (1) + { + pool_foreach (si, sm->interface_pool, ({ + maybe_send_ips_message (si); + })); + vlib_process_suspend (vm, 1.0); + } + + return 0; +} + +VLIB_REGISTER_NODE (srp_ips_process_node) = { + .function = srp_ips_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "srp-ips-process", + .state = VLIB_NODE_STATE_DISABLED, +}; + +static clib_error_t * srp_init (vlib_main_t * vm) +{ + srp_main_t * sm = &srp_main; + + sm->default_data_ttl = 255; + sm->vlib_main = vm; + srp_setup_node (vm, srp_input_node.index); + + return 0; +} + +VLIB_INIT_FUNCTION (srp_init); diff --git a/vnet/vnet/srp/packet.h b/vnet/vnet/srp/packet.h new file mode 100644 index 00000000000..96dab648b32 --- /dev/null +++ b/vnet/vnet/srp/packet.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * srp/packet.h: srp packet format. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_srp_packet_h +#define included_srp_packet_h + +#include <vppinfra/byte_order.h> +#include <vppinfra/bitops.h> +#include <vnet/ethernet/packet.h> + +/* SRP version 2. */ + +#define foreach_srp_mode \ + _ (reserved0) \ + _ (reserved1) \ + _ (reserved2) \ + _ (reserved3) \ + _ (control_pass_to_host) \ + _ (control_locally_buffered_for_host) \ + _ (keep_alive) \ + _ (data) + +typedef enum { +#define _(f) SRP_MODE_##f, + foreach_srp_mode +#undef _ + SRP_N_MODE, +} srp_mode_t; + +typedef union { + /* For computing parity bit. */ + u16 as_u16; + + struct { + u8 ttl; + +#if CLIB_ARCH_IS_BIG_ENDIAN + u8 is_inner_ring : 1; + u8 mode : 3; + u8 priority : 3; + u8 parity : 1; +#endif +#if CLIB_ARCH_IS_LITTLE_ENDIAN + u8 parity : 1; + u8 priority : 3; + u8 mode : 3; + u8 is_inner_ring : 1; +#endif + }; +} srp_header_t; + +always_inline void +srp_header_compute_parity (srp_header_t * h) +{ + h->parity = 0; + h->parity = count_set_bits (h->as_u16) ^ 1; /* odd parity */ +} + +typedef struct { + srp_header_t srp; + ethernet_header_t ethernet; +} srp_and_ethernet_header_t; + +#define foreach_srp_control_packet_type \ + _ (reserved) \ + _ (topology) \ + _ (ips) + +typedef enum { +#define _(f) SRP_CONTROL_PACKET_TYPE_##f, + foreach_srp_control_packet_type +#undef _ + SRP_N_CONTROL_PACKET_TYPE, +} srp_control_packet_type_t; + +typedef CLIB_PACKED (struct { + /* Set to 0. */ + u8 version; + + srp_control_packet_type_t type : 8; + + /* IP4-like checksum of packet starting with start of control header. */ + u16 checksum; + + u16 ttl; +}) srp_control_header_t; + +typedef struct { + srp_header_t srp; + ethernet_header_t ethernet; + srp_control_header_t control; +} srp_generic_control_header_t; + +typedef struct { + u8 flags; +#define SRP_TOPOLOGY_MAC_BINDING_FLAG_IS_INNER_RING (1 << 6) +#define SRP_TOPOLOGY_MAC_BINDING_FLAG_IS_WRAPPED (1 << 5) + + /* MAC address. */ + u8 address[6]; +} srp_topology_mac_binding_t; + +typedef CLIB_PACKED (struct { + srp_header_t srp; + ethernet_header_t ethernet; + srp_control_header_t control; + + /* Length in bytes of data that follows. */ + u16 n_bytes_of_data_that_follows; + + /* MAC address of originator of this topology request. */ + u8 originator_address[6]; + + /* Bindings follow. */ + srp_topology_mac_binding_t bindings[0]; +}) srp_topology_header_t; + +#define foreach_srp_ips_request_type \ + _ (idle, 0x0) \ + _ (wait_to_restore, 0x5) \ + _ (manual_switch, 0x6) \ + _ (signal_degrade, 0x8) \ + _ (signal_fail, 0xb) \ + _ (forced_switch, 0xd) + +typedef enum { +#define _(f,n) SRP_IPS_REQUEST_##f = n, + foreach_srp_ips_request_type +#undef _ +} srp_ips_request_type_t; + +#define foreach_srp_ips_status \ + _ (idle, 0x0) \ + _ (wrapped, 0x2) + +typedef enum { +#define _(f,n) SRP_IPS_STATUS_##f = n, + foreach_srp_ips_status +#undef _ +} srp_ips_status_t; + +typedef struct { + srp_header_t srp; + ethernet_header_t ethernet; + srp_control_header_t control; + u8 originator_address[6]; + + union { + u8 ips_octet; + + struct { +#if CLIB_ARCH_IS_BIG_ENDIAN + u8 request_type : 4; + u8 is_long_path : 1; + u8 status : 3; +#endif +#if CLIB_ARCH_IS_LITTLE_ENDIAN + u8 status : 3; + u8 is_long_path : 1; + u8 request_type : 4; +#endif + }; + }; + + u8 reserved; +} srp_ips_header_t; + +#endif /* included_srp_packet_h */ diff --git a/vnet/vnet/srp/pg.c b/vnet/vnet/srp/pg.c new file mode 100644 index 00000000000..54f1a3bba18 --- /dev/null +++ b/vnet/vnet/srp/pg.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * srp/pg.c: packet generator srp interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/srp/srp.h> +#include <vnet/ethernet/ethernet.h> + +typedef struct { + pg_edit_t ttl; + pg_edit_t is_inner_ring; + pg_edit_t mode; + pg_edit_t priority; + pg_edit_t parity; + pg_edit_t type; + pg_edit_t src_address; + pg_edit_t dst_address; +} pg_srp_header_t; + +static inline void +pg_srp_header_init (pg_srp_header_t * e) +{ + pg_edit_init (&e->ttl, srp_and_ethernet_header_t, srp.ttl); + pg_edit_init_bitfield (&e->is_inner_ring, srp_and_ethernet_header_t, + srp.as_u16, + 7, 1); + pg_edit_init_bitfield (&e->mode, srp_and_ethernet_header_t, + srp.as_u16, + 4, 3); + pg_edit_init_bitfield (&e->priority, srp_and_ethernet_header_t, + srp.as_u16, + 1, 3); + pg_edit_init_bitfield (&e->parity, srp_and_ethernet_header_t, + srp.as_u16, + 0, 1); + pg_edit_init (&e->type, srp_and_ethernet_header_t, ethernet.type); + pg_edit_init (&e->src_address, srp_and_ethernet_header_t, ethernet.src_address); + pg_edit_init (&e->dst_address, srp_and_ethernet_header_t, ethernet.dst_address); +} + +uword +unformat_pg_srp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_srp_header_t * e; + u32 error, group_index; + + e = pg_create_edit_group (s, sizeof (e[0]), sizeof (srp_header_t), + &group_index); + pg_srp_header_init (e); + + error = 1; + if (! unformat (input, "%U: %U -> %U", + unformat_pg_edit, + unformat_ethernet_type_net_byte_order, &e->type, + unformat_pg_edit, + unformat_ethernet_address, &e->src_address, + unformat_pg_edit, + unformat_ethernet_address, &e->dst_address)) + goto done; + + { + srp_header_t h; + + h.as_u16 = 0; + h.mode = SRP_MODE_data; + h.ttl = 255; + h.parity = count_set_bits (h.as_u16) ^ 1; + + pg_edit_set_fixed (&e->mode, h.mode); + pg_edit_set_fixed (&e->ttl, h.ttl); + pg_edit_set_fixed (&e->is_inner_ring, h.is_inner_ring); + pg_edit_set_fixed (&e->priority, h.priority); + pg_edit_set_fixed (&e->parity, h.parity); + } + + error = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "mode %U", + unformat_pg_edit, + unformat_pg_number, &e->mode)) + ; + else if (unformat (input, "ttl %U", + unformat_pg_edit, + unformat_pg_number, &e->ttl)) + ; + else if (unformat (input, "priority %U", + unformat_pg_edit, + unformat_pg_number, &e->priority)) + ; + else + break; + } + + { + ethernet_main_t * em = ðernet_main; + ethernet_type_info_t * ti = 0; + pg_node_t * pg_node = 0; + + if (e->type.type == PG_EDIT_FIXED) + { + u16 t = *(u16 *) e->type.values[PG_EDIT_LO]; + ti = ethernet_get_type_info (em, clib_net_to_host_u16 (t)); + if (ti && ti->node_index != ~0) + pg_node = pg_get_node (ti->node_index); + } + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + else if (! unformat_user (input, unformat_pg_payload, s)) + goto done; + } + + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/srp/srp.h b/vnet/vnet/srp/srp.h new file mode 100644 index 00000000000..1b241710409 --- /dev/null +++ b/vnet/vnet/srp/srp.h @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * srp.h: types/functions for srp. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_srp_h +#define included_srp_h + +#include <vnet/vnet.h> +#include <vnet/srp/packet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/pg/pg.h> + +extern vnet_hw_interface_class_t srp_hw_interface_class; + +/* See RFC 2892. */ +#define foreach_srp_ips_state \ + _ (idle) \ + _ (pass_thru) \ + _ (wrapped) + +typedef enum { +#define _(f) SRP_IPS_STATE_##f, + foreach_srp_ips_state +#undef _ + SRP_N_IPS_STATE, +} srp_ips_state_t; + +typedef enum { + SRP_RING_OUTER, + SRP_RING_INNER, + SRP_N_RING = 2, + SRP_SIDE_A = SRP_RING_OUTER, /* outer rx, inner tx */ + SRP_SIDE_B = SRP_RING_INNER, /* inner rx, outer tx */ + SRP_N_SIDE = 2, +} srp_ring_type_t; + +typedef struct { + srp_ring_type_t ring; + + /* Hardware interface for this ring/side. */ + u32 hw_if_index; + + /* Software interface corresponding to hardware interface. */ + u32 sw_if_index; + + /* Mac address of neighbor on RX fiber. */ + u8 rx_neighbor_address[6]; + + u8 rx_neighbor_address_valid; + + /* True if we are waiting to restore signal. */ + u8 waiting_to_restore; + + /* Time stamp when signal became valid. */ + f64 wait_to_restore_start_time; +} srp_interface_ring_t; + +struct srp_interface_t; +typedef void (srp_hw_wrap_function_t) (u32 hw_if_index, u32 wrap_enable); +typedef void (srp_hw_enable_function_t) (struct srp_interface_t * si, u32 wrap_enable); + +typedef struct { + /* Delay between wait to restore event and entering idle state in seconds. */ + f64 wait_to_restore_idle_delay; + + /* Number of seconds between sending ips messages to neighbors. */ + f64 ips_tx_interval; +} srp_interface_config_t; + +typedef struct srp_interface_t { + /* Current IPS state. */ + srp_ips_state_t current_ips_state; + + /* Address for this interface. */ + u8 my_address[6]; + + /* Enable IPS process handling for this interface. */ + u8 ips_process_enable; + + srp_interface_ring_t rings[SRP_N_RING]; + + /* Configurable parameters. */ + srp_interface_config_t config; + + srp_hw_wrap_function_t * hw_wrap_function; + + srp_hw_enable_function_t * hw_enable_function; +} srp_interface_t; + +typedef struct { + vlib_main_t * vlib_main; + + /* Pool of SRP interfaces. */ + srp_interface_t * interface_pool; + + uword * interface_index_by_hw_if_index; + + /* TTL to use for outgoing data packets. */ + u32 default_data_ttl; + + vlib_one_time_waiting_process_t * srp_register_interface_waiting_process_pool; + + uword * srp_register_interface_waiting_process_pool_index_by_hw_if_index; +} srp_main_t; + +/* Registers sides A/B hardware interface as being SRP capable. */ +void srp_register_interface (u32 * hw_if_indices); + +/* Enable sending IPS messages for interface implied by given vlib hardware interface. */ +void srp_interface_enable_ips (u32 hw_if_index); + +/* Set function to wrap hardware side of SRP interface. */ +void srp_interface_set_hw_wrap_function (u32 hw_if_index, srp_hw_wrap_function_t * f); + +void srp_interface_set_hw_enable_function (u32 hw_if_index, srp_hw_enable_function_t * f); + +vlib_node_registration_t srp_ips_process_node; + +/* Called when an IPS control packet is received on given interface. */ +void srp_ips_rx_packet (u32 sw_if_index, srp_ips_header_t * ips_packet); + +/* Preform local IPS request on given interface. */ +void srp_ips_local_request (u32 sw_if_index, srp_ips_request_type_t request); + +always_inline void +srp_ips_link_change (u32 sw_if_index, u32 link_is_up) +{ + srp_ips_local_request (sw_if_index, + link_is_up + ? SRP_IPS_REQUEST_wait_to_restore + : SRP_IPS_REQUEST_signal_fail); +} + +void srp_interface_get_interface_config (u32 hw_if_index, srp_interface_config_t * c); +void srp_interface_set_interface_config (u32 hw_if_index, srp_interface_config_t * c); + +srp_main_t srp_main; + +always_inline srp_interface_t * +srp_get_interface_from_vnet_hw_interface (u32 hw_if_index) +{ + srp_main_t * sm = &srp_main; + uword * p = hash_get (sm->interface_index_by_hw_if_index, hw_if_index); + return p ? pool_elt_at_index (sm->interface_pool, p[0]) : 0; +} + +u8 * format_srp_header (u8 * s, va_list * args); +u8 * format_srp_header_with_length (u8 * s, va_list * args); +u8 * format_srp_device (u8 * s, va_list * args); + +/* Parse srp header. */ +uword +unformat_srp_header (unformat_input_t * input, va_list * args); + +uword unformat_pg_srp_header (unformat_input_t * input, va_list * args); + +always_inline void +srp_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + pg_node_t * pn = pg_get_node (node_index); + n->format_buffer = format_srp_header_with_length; + n->unformat_buffer = unformat_srp_header; + pn->unformat_edit = unformat_pg_srp_header; +} + +#define foreach_srp_error \ + _ (NONE, "no error") \ + _ (UNKNOWN_MODE, "unknown mode in SRP header") \ + _ (KEEP_ALIVE_DROPPED, "v1 keep alive mode in SRP header") \ + _ (CONTROL_PACKETS_PROCESSED, "control packets processed") \ + _ (IPS_PACKETS_PROCESSED, "IPS packets processed") \ + _ (UNKNOWN_CONTROL, "unknown control packet") \ + _ (CONTROL_VERSION_NON_ZERO, "control packet with non-zero version") \ + _ (CONTROL_BAD_CHECKSUM, "control packet with bad checksum") \ + _ (TOPOLOGY_BAD_LENGTH, "topology packet with bad length") + +typedef enum { +#define _(n,s) SRP_ERROR_##n, + foreach_srp_error +#undef _ + SRP_N_ERROR, +} srp_error_t; + +serialize_function_t serialize_srp_main, unserialize_srp_main; + +#endif /* included_srp_h */ diff --git a/vnet/vnet/unix/gdb_funcs.c b/vnet/vnet/unix/gdb_funcs.c new file mode 100644 index 00000000000..9a2e4599a2b --- /dev/null +++ b/vnet/vnet/unix/gdb_funcs.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/format.h> +#include <vlib/vlib.h> + +#include <vlib/threads.h> + +/* Functions to call from gdb */ + +u32 vl(void *p) +{ + return vec_len (p); +} + +uword pe (void *v) +{ + return (pool_elts(v)); +} + +int pifi (void *p, u32 index) +{ + return pool_is_free_index (p, index); +} + +void debug_hex_bytes (u8 *s, u32 n) +{ + fformat (stderr, "%U\n", format_hex_bytes, s, n); +} + +void vlib_dump_frame_ownership (void) +{ + vlib_main_t * vm = vlib_get_main(); + vlib_node_main_t * nm = &vm->node_main; + vlib_node_runtime_t * this_node_runtime; + vlib_next_frame_t * nf; + u32 first_nf_index; + u32 index; + + vec_foreach(this_node_runtime, nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]) + { + first_nf_index = this_node_runtime->next_frame_index; + + for (index = first_nf_index; index < first_nf_index + + this_node_runtime->n_next_nodes; index++) + { + vlib_node_runtime_t * owned_runtime; + nf = vec_elt_at_index (vm->node_main.next_frames, index); + if (nf->flags & VLIB_FRAME_OWNER) + { + owned_runtime = vec_elt_at_index (nm->nodes_by_type[0], + nf->node_runtime_index); + fformat(stderr, + "%s next index %d owns enqueue rights to %s\n", + nm->nodes[this_node_runtime->node_index]->name, + index - first_nf_index, + nm->nodes[owned_runtime->node_index]->name); + fformat (stderr, " nf index %d nf->frame_index %d\n", + nf - vm->node_main.next_frames, + nf->frame_index); + } + } + } +} + +void vlib_runtime_index_to_node_name (u32 index) +{ + vlib_main_t * vm = vlib_get_main(); + vlib_node_main_t * nm = &vm->node_main; + + if (index > vec_len (nm->nodes)) + { + fformat(stderr, "%d out of range, max %d\n", vec_len(nm->nodes)); + return; + } + + fformat(stderr, "node runtime index %d name %s\n", index, nm->nodes[index]->name); +} + + +static clib_error_t * +show_gdb_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_cli_output (vm, "vl(p) returns vec_len(p)"); + vlib_cli_output (vm, "pe(p) returns pool_elts(p)"); + vlib_cli_output (vm, "pifi(p, i) returns pool_is_free_index(p, i)"); + vlib_cli_output (vm, "debug_hex_bytes (ptr, n_bytes) dumps n_bytes in hex"); + vlib_cli_output (vm, "vlib_dump_frame_ownership() does what it says"); + vlib_cli_output (vm, "vlib_runtime_index_to_node_name (index) prints NN"); + + return 0; +} + +VLIB_CLI_COMMAND (show_gdb_funcs_command, static) = { + .path = "show gdb", + .short_help = "Describe functions which can be called from gdb", + .function = show_gdb_command_fn, +}; + +/* Cafeteria plan, maybe you don't want these functions */ +clib_error_t * +gdb_func_init (vlib_main_t * vm) { return 0; } + +VLIB_INIT_FUNCTION (gdb_func_init); diff --git a/vnet/vnet/unix/pcap.c b/vnet/vnet/unix/pcap.c new file mode 100644 index 00000000000..16b8443085b --- /dev/null +++ b/vnet/vnet/unix/pcap.c @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pcap.c: libpcap packet capture format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/unix/pcap.h> +#include <sys/fcntl.h> + +/* Usage + +#include <vnet/unix/pcap.h> + +static pcap_main_t pcap = { + .file_name = "/tmp/ip4", + .n_packets_to_capture = 2, + .packet_type = PCAP_PACKET_TYPE_ip, +}; + +To add a buffer: + + pcap_add_buffer (&pcap, vm, pi0, 128); + +file will be written after n_packets_to_capture or call to pcap_write (&pcap). + +*/ + +clib_error_t * +pcap_write (pcap_main_t * pm) +{ + clib_error_t * error = 0; + + if (! (pm->flags & PCAP_MAIN_INIT_DONE)) + { + pcap_file_header_t fh; + int n; + + if (! pm->file_name) + pm->file_name = "/tmp/vnet.pcap"; + + pm->file_descriptor = open (pm->file_name, O_CREAT | O_TRUNC | O_WRONLY, 0664); + if (pm->file_descriptor < 0) + { + error = clib_error_return_unix (0, "failed to open `%s'", pm->file_name); + goto done; + } + + pm->flags |= PCAP_MAIN_INIT_DONE; + pm->n_packets_captured = 0; + pm->n_pcap_data_written = 0; + + /* Write file header. */ + memset (&fh, 0, sizeof (fh)); + fh.magic = 0xa1b2c3d4; + fh.major_version = 2; + fh.minor_version = 4; + fh.time_zone = 0; + fh.max_packet_size_in_bytes = 1 << 16; + fh.packet_type = pm->packet_type; + n = write (pm->file_descriptor, &fh, sizeof (fh)); + if (n != sizeof (fh)) + { + if (n < 0) + error = clib_error_return_unix (0, "write file header `%s'", pm->file_name); + else + error = clib_error_return (0, "short write of file header `%s'", pm->file_name); + goto done; + } + } + + do { + int n = vec_len (pm->pcap_data) - pm->n_pcap_data_written; + + if (n > 0) + { + n = write (pm->file_descriptor, + vec_elt_at_index (pm->pcap_data, pm->n_pcap_data_written), + n); + if (n < 0 && unix_error_is_fatal (errno)) + { + error = clib_error_return_unix (0, "write `%s'", pm->file_name); + goto done; + } + } + pm->n_pcap_data_written += n; + if (pm->n_pcap_data_written >= vec_len (pm->pcap_data)) + { + vec_reset_length (pm->pcap_data); + break; + } + } while (pm->n_packets_captured >= pm->n_packets_to_capture); + + if (pm->n_packets_captured >= pm->n_packets_to_capture) + { + close (pm->file_descriptor); + pm->flags &= ~PCAP_MAIN_INIT_DONE; + pm->file_descriptor = -1; + } + + done: + if (error) + { + if (pm->file_descriptor >= 0) + close (pm->file_descriptor); + } + return error; +} + +clib_error_t * pcap_read (pcap_main_t * pm) +{ + clib_error_t * error = 0; + int fd, need_swap, n; + pcap_file_header_t fh; + pcap_packet_header_t ph; + + fd = open (pm->file_name, O_RDONLY); + if (fd < 0) + { + error = clib_error_return_unix (0, "open `%s'", pm->file_name); + goto done; + } + + if (read (fd, &fh, sizeof (fh)) != sizeof (fh)) + { + error = clib_error_return_unix (0, "read file header `%s'", pm->file_name); + goto done; + } + + need_swap = 0; + if (fh.magic == 0xd4c3b2a1) + { + need_swap = 1; +#define _(t,f) fh.f = clib_byte_swap_##t (fh.f); + foreach_pcap_file_header; +#undef _ + } + + if (fh.magic != 0xa1b2c3d4) + { + error = clib_error_return (0, "bad magic `%s'", pm->file_name); + goto done; + } + + pm->min_packet_bytes = 0; + pm->max_packet_bytes = 0; + while ((n = read (fd, &ph, sizeof (ph))) != 0) + { + u8 * data; + + if (need_swap) + { +#define _(t,f) ph.f = clib_byte_swap_##t (ph.f); + foreach_pcap_packet_header; +#undef _ + } + + data = vec_new (u8, ph.n_bytes_in_packet); + if (read (fd, data, ph.n_packet_bytes_stored_in_file) != ph.n_packet_bytes_stored_in_file) + { + error = clib_error_return (0, "short read `%s'", pm->file_name); + goto done; + } + + if (vec_len (pm->packets_read) == 0) + pm->min_packet_bytes = pm->max_packet_bytes = ph.n_bytes_in_packet; + else + { + pm->min_packet_bytes = clib_min (pm->min_packet_bytes, ph.n_bytes_in_packet); + pm->max_packet_bytes = clib_max (pm->max_packet_bytes, ph.n_bytes_in_packet); + } + + vec_add1 (pm->packets_read, data); + } + + done: + if (fd >= 0) + close (fd); + return error; + +} diff --git a/vnet/vnet/unix/pcap.h b/vnet/vnet/unix/pcap.h new file mode 100644 index 00000000000..6e8e69191f5 --- /dev/null +++ b/vnet/vnet/unix/pcap.h @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pcap.h: libpcap packet capture format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_pcap_h +#define included_vnet_pcap_h + +#include <vlib/vlib.h> + +#define foreach_vnet_pcap_packet_type \ + _ (null, 0) \ + _ (ethernet, 1) \ + _ (ppp, 9) \ + _ (ip, 12) \ + _ (hdlc, 104) + +typedef enum { +#define _(f,n) PCAP_PACKET_TYPE_##f = (n), + foreach_vnet_pcap_packet_type +#undef _ +} pcap_packet_type_t; + +#define foreach_pcap_file_header \ + /* 0xa1b2c3d4 host byte order. \ + 0xd4c3b2a1 => need to byte swap everything. */ \ + _ (u32, magic) \ + \ + /* Currently major 2 minor 4. */ \ + _ (u16, major_version) \ + _ (u16, minor_version) \ + \ + /* 0 for GMT. */ \ + _ (u32, time_zone) \ + \ + /* Accuracy of timestamps. Typically set to 0. */ \ + _ (u32, sigfigs) \ + \ + /* Size of largest packet in file. */ \ + _ (u32, max_packet_size_in_bytes) \ + \ + /* One of vnet_pcap_packet_type_t. */ \ + _ (u32, packet_type) + +/* File header. */ +typedef struct { +#define _(t, f) t f; + foreach_pcap_file_header +#undef _ +} pcap_file_header_t; + +#define foreach_pcap_packet_header \ + /* Time stamp in seconds and microseconds. */ \ + _ (u32, time_in_sec) \ + _ (u32, time_in_usec) \ + \ + /* Number of bytes stored in file and size of actual packet. */ \ + _ (u32, n_packet_bytes_stored_in_file) \ + _ (u32, n_bytes_in_packet) + +/* Packet header. */ +typedef struct { +#define _(t, f) t f; + foreach_pcap_packet_header +#undef _ + + /* Packet data follows. */ + u8 data[0]; +} pcap_packet_header_t; + +typedef struct { + /* File name of pcap output. */ + char * file_name; + + /* Number of packets to capture. */ + u32 n_packets_to_capture; + + pcap_packet_type_t packet_type; + + /* Number of packets currently captured. */ + u32 n_packets_captured; + + u32 flags; +#define PCAP_MAIN_INIT_DONE (1 << 0) + + /* File descriptor for reading/writing. */ + int file_descriptor; + + u32 n_pcap_data_written; + + /* Vector of pcap data. */ + u8 * pcap_data; + + /* Packets read from file. */ + u8 ** packets_read; + + u32 min_packet_bytes, max_packet_bytes; +} pcap_main_t; + +/* Write out data to output file. */ +clib_error_t * pcap_write (pcap_main_t * pm); + +clib_error_t * pcap_read (pcap_main_t * pm); + +static inline void * +pcap_add_packet (pcap_main_t * pm, + f64 time_now, + u32 n_bytes_in_trace, + u32 n_bytes_in_packet) +{ + pcap_packet_header_t * h; + u8 * d; + + vec_add2 (pm->pcap_data, d, sizeof (h[0]) + n_bytes_in_trace); + h = (void *) (d); + h->time_in_sec = time_now; + h->time_in_usec = 1e6*(time_now - h->time_in_sec); + h->n_packet_bytes_stored_in_file = n_bytes_in_trace; + h->n_bytes_in_packet = n_bytes_in_packet; + pm->n_packets_captured++; + return h->data; +} + +static inline void +pcap_add_buffer (pcap_main_t * pm, + vlib_main_t * vm, u32 buffer_index, + u32 n_bytes_in_trace) +{ + vlib_buffer_t * b = vlib_get_buffer (vm, buffer_index); + u32 n = vlib_buffer_length_in_chain (vm, b); + i32 n_left = clib_min (n_bytes_in_trace, n); + f64 time_now = vlib_time_now (vm); + void * d; + + d = pcap_add_packet (pm, time_now, n_bytes_in_trace, n_left); + while (1) + { + memcpy (d, b->data + b->current_data, b->current_length); + n_left -= b->current_length; + if (n_left <= 0) + break; + d += b->current_length; + ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT); + b = vlib_get_buffer (vm, b->next_buffer); + } + + /* Flush output vector. */ + if (vec_len (pm->pcap_data) >= 64*1024 + || pm->n_packets_captured >= pm->n_packets_to_capture) + pcap_write (pm); +} + +#endif /* included_vnet_pcap_h */ diff --git a/vnet/vnet/unix/pcap2pg.c b/vnet/vnet/unix/pcap2pg.c new file mode 100644 index 00000000000..10b47c976dc --- /dev/null +++ b/vnet/vnet/unix/pcap2pg.c @@ -0,0 +1,155 @@ +/* + * pcap2pg.c: convert pcap input to pg input + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/unix/pcap.h> +#include <vnet/ethernet/packet.h> +#include <stdio.h> + +pcap_main_t pcap_main; + +static char * pg_fmt = + "packet-generator new {\n" + " name s%d\n" + " limit 1\n" + " size %d-%d\n" + " node ethernet-input\n"; + + +void stream_boilerplate (FILE *ofp, int i, u8 * pkt) +{ + fformat(ofp, pg_fmt, i, vec_len(pkt), vec_len(pkt)); +} + +int pcap2pg (pcap_main_t * pm, FILE *ofp) +{ + int i, j; + u8 *pkt; + + for (i = 0; i < vec_len (pm->packets_read); i++) + { + int offset; + ethernet_header_t * h; + u64 ethertype; + + pkt = pm->packets_read[i]; + h = (ethernet_header_t *)pkt; + + stream_boilerplate (ofp, i, pkt); + + fformat (ofp, " data {\n"); + + ethertype = clib_net_to_host_u16 (h->type); + + /* + * In vnet terms, packet generator interfaces are not ethernets. + * They don't have vlan tables. + * This dance transforms captured 802.1q VLAN packets into + * regular Ethernet packets. + */ + if (ethertype == 0x8100 /* 802.1q vlan */) + { + u16 * vlan_ethertype = (u16 *)(h+1); + ethertype = clib_net_to_host_u16(vlan_ethertype[0]); + offset = 18; + } + else + offset = 14; + + fformat (ofp, + " 0x%04x: %02x%02x.%02x%02x.%02x%02x" + " -> %02x%02x.%02x%02x.%02x%02x\n", + ethertype, + h->src_address[0], + h->src_address[1], + h->src_address[2], + h->src_address[3], + h->src_address[4], + h->src_address[5], + h->dst_address[0], + h->dst_address[1], + h->dst_address[2], + h->dst_address[3], + h->dst_address[4], + h->dst_address[5]); + + fformat (ofp, " hex 0x"); + + for (j = offset; j < vec_len (pkt); j++) + fformat (ofp, "%02x", pkt[j]); + + fformat (ofp, " }\n"); + fformat (ofp, "}\n\n"); + } + return 0; +} + +int main (int argc, char **argv) +{ + unformat_input_t input; + pcap_main_t * pm = &pcap_main; + u8 * input_file = 0, * output_file = 0; + FILE * ofp; + clib_error_t * error; + + unformat_init_command_line (&input, argv); + + while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) + { + if (unformat(&input, "-i %s", &input_file) + || unformat (&input, "input %s", &input_file)) + ; + else if (unformat (&input, "-o %s", &output_file) + || unformat (&input, "output %s", &output_file)) + ; + else + { + usage: + fformat(stderr, + "usage: pcap2pg -i <input-file> [-o <output-file>]\n"); + exit (1); + } + } + + if (input_file == 0) + goto usage; + + pm->file_name = (char *)input_file; + error = pcap_read (pm); + + if (error) + { + clib_error_report (error); + exit (1); + } + + if (output_file) + { + ofp = fopen ((char *)output_file, "rw"); + if (ofp == NULL) + clib_unix_warning ("Couldn't create '%s'", output_file); + exit (1); + } + else + { + ofp = stdout; + } + + pcap2pg (pm, ofp); + + fclose (ofp); + exit (0); +} diff --git a/vnet/vnet/unix/tapcli.c b/vnet/vnet/unix/tapcli.c new file mode 100644 index 00000000000..44af321f796 --- /dev/null +++ b/vnet/vnet/unix/tapcli.c @@ -0,0 +1,1200 @@ +/* + *------------------------------------------------------------------ + * tapcli.c - dynamic tap interface hookup + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <fcntl.h> /* for open */ +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/uio.h> /* for iovec */ +#include <netinet/in.h> + +#include <linux/if_arp.h> +#include <linux/if_tun.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> + +#include <vnet/ip/ip.h> + +#include <vnet/ethernet/ethernet.h> + +#if DPDK == 1 +#include <vnet/devices/dpdk/dpdk.h> +#endif + +#include <vnet/unix/tapcli.h> + +static vnet_device_class_t tapcli_dev_class; +static vnet_hw_interface_class_t tapcli_interface_class; + +static void tapcli_nopunt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame); +typedef struct { + u32 unix_fd; + u32 unix_file_index; + u32 provision_fd; + u32 sw_if_index; /* for counters */ + u32 hw_if_index; + u32 is_promisc; + struct ifreq ifr; + u32 per_interface_next_index; + u8 active; /* for delete */ +} tapcli_interface_t; + +typedef struct { + /* Vector of iovecs for readv/writev calls. */ + struct iovec * iovecs; + + /* Vector of VLIB rx buffers to use. We allocate them in blocks + of VLIB_FRAME_SIZE (256). */ + u32 * rx_buffers; + + /* tap device destination MAC address. Required, or Linux drops pkts */ + u8 ether_dst_mac[6]; + + /* Interface MTU in bytes and # of default sized buffers. */ + u32 mtu_bytes, mtu_buffers; + + /* Vector of tap interfaces */ + tapcli_interface_t * tapcli_interfaces; + + /* Vector of deleted tap interfaces */ + u32 * tapcli_inactive_interfaces; + + /* Bitmap of tap interfaces with pending reads */ + uword * pending_read_bitmap; + + /* Hash table to find tapcli interface given hw_if_index */ + uword * tapcli_interface_index_by_sw_if_index; + + /* Hash table to find tapcli interface given unix fd */ + uword * tapcli_interface_index_by_unix_fd; + + /* renumbering table */ + u32 * show_dev_instance_by_real_dev_instance; + + /* 1 => disable CLI */ + int is_disabled; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + unix_main_t * unix_main; +} tapcli_main_t; + +static tapcli_main_t tapcli_main; + +/* + * tapcli_tx + * Output node, writes the buffers comprising the incoming frame + * to the tun/tap device, aka hands them to the Linux kernel stack. + * + */ +static uword +tapcli_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * buffers = vlib_frame_args (frame); + uword n_packets = frame->n_vectors; + tapcli_main_t * tm = &tapcli_main; + tapcli_interface_t * ti; + int i; + + for (i = 0; i < n_packets; i++) + { + struct iovec * iov; + vlib_buffer_t * b; + uword l; + vnet_hw_interface_t * hw; + uword * p; + u32 tx_sw_if_index; + + b = vlib_get_buffer (vm, buffers[i]); + + tx_sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_TX]; + if (tx_sw_if_index == (u32)~0) + tx_sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_RX]; + + ASSERT(tx_sw_if_index != (u32)~0); + + /* Use the sup intfc to finesse vlan subifs */ + hw = vnet_get_sup_hw_interface (tm->vnet_main, tx_sw_if_index); + tx_sw_if_index = hw->sw_if_index; + + p = hash_get (tm->tapcli_interface_index_by_sw_if_index, + tx_sw_if_index); + if (p == 0) + { + clib_warning ("sw_if_index %d unknown", tx_sw_if_index); + /* $$$ leak, but this should never happen... */ + continue; + } + else + ti = vec_elt_at_index (tm->tapcli_interfaces, p[0]); + + /* Re-set iovecs if present. */ + if (tm->iovecs) + _vec_len (tm->iovecs) = 0; + + /* VLIB buffer chain -> Unix iovec(s). */ + vec_add2 (tm->iovecs, iov, 1); + iov->iov_base = b->data + b->current_data; + iov->iov_len = l = b->current_length; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + do { + b = vlib_get_buffer (vm, b->next_buffer); + + vec_add2 (tm->iovecs, iov, 1); + + iov->iov_base = b->data + b->current_data; + iov->iov_len = b->current_length; + l += b->current_length; + } while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + } + + if (writev (ti->unix_fd, tm->iovecs, vec_len (tm->iovecs)) < l) + clib_unix_warning ("writev"); + } + + /* interface output path flattens buffer chains */ + vlib_buffer_free_no_next (vm, buffers, n_packets); + + return n_packets; +} + +VLIB_REGISTER_NODE (tapcli_tx_node,static) = { + .function = tapcli_tx, + .name = "tapcli-tx", + .type = VLIB_NODE_TYPE_INTERNAL, + .vector_size = 4, +}; + +enum { + TAPCLI_RX_NEXT_IP4_INPUT, + TAPCLI_RX_NEXT_IP6_INPUT, + TAPCLI_RX_NEXT_ETHERNET_INPUT, + TAPCLI_RX_NEXT_DROP, + TAPCLI_RX_N_NEXT, +}; + +static uword +tapcli_rx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + tapcli_main_t * tm = &tapcli_main; + vlib_buffer_t * b; + u32 bi; +#if DPDK == 0 + const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + u32 free_list_index = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX; +#else + dpdk_main_t * dm = &dpdk_main; + const uword buffer_size = MBUF_SIZE; + u32 free_list_index = dm->vlib_buffer_free_list_index; +#endif + static u32 * ready_interface_indices; + tapcli_interface_t * ti; + int i; + + vec_reset_length (ready_interface_indices); + + clib_bitmap_foreach (i, tm->pending_read_bitmap, + ({ + vec_add1 (ready_interface_indices, i); + })); + + if (vec_len (ready_interface_indices) == 0) + return 1; + + for (i = 0; i < vec_len(ready_interface_indices); i++) + { + /* Clear the "interrupt" bit */ + tm->pending_read_bitmap = + clib_bitmap_set (tm->pending_read_bitmap, + ready_interface_indices[i], 0); + + ti = vec_elt_at_index (tm->tapcli_interfaces, ready_interface_indices[i]); + + /* Make sure we have some RX buffers. */ + { + uword n_left = vec_len (tm->rx_buffers); + uword n_alloc; + + if (n_left < VLIB_FRAME_SIZE / 2) + { + if (! tm->rx_buffers) + vec_alloc (tm->rx_buffers, VLIB_FRAME_SIZE); + + n_alloc = vlib_buffer_alloc_from_free_list + (vm, tm->rx_buffers + n_left, VLIB_FRAME_SIZE - n_left, + free_list_index); + _vec_len (tm->rx_buffers) = n_left + n_alloc; + } + } + + /* Allocate RX buffers from end of rx_buffers. + Turn them into iovecs to pass to readv. */ + { + uword i_rx = vec_len (tm->rx_buffers) - 1; + vlib_buffer_t * b; + word j, n_bytes_left, n_bytes_in_packet; +#if DPDK == 1 + u8 out_of_dpdk_buffers = 0; +#endif + + /* We need enough buffers left for an MTU sized packet. */ + if (PREDICT_FALSE(vec_len (tm->rx_buffers) < tm->mtu_buffers)) + { + clib_bitmap_set (tm->pending_read_bitmap, + ready_interface_indices[i], 1); + clib_warning ("buffer allocation failure"); + continue; + } + + vec_validate (tm->iovecs, tm->mtu_buffers - 1); + for (j = 0; j < tm->mtu_buffers; j++) + { + b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - j]); + tm->iovecs[j].iov_base = b->data; + tm->iovecs[j].iov_len = buffer_size; + } + +#if DPDK == 1 + if (PREDICT_FALSE(out_of_dpdk_buffers == 1)) + continue; +#endif + + n_bytes_left = readv (ti->unix_fd, tm->iovecs, tm->mtu_buffers); + n_bytes_in_packet = n_bytes_left; + if (n_bytes_left <= 0) + { + if (errno != EAGAIN) + clib_unix_warning ("readv %d", n_bytes_left); + return 0; + } + + bi = tm->rx_buffers[i_rx]; + while (1) + { + b = vlib_get_buffer (vm, tm->rx_buffers[i_rx]); + + b->flags = 0; + b->current_data = 0; + b->current_length = n_bytes_left < buffer_size + ? n_bytes_left : buffer_size; + + n_bytes_left -= buffer_size; + + if (n_bytes_left <= 0) + { +#if DPDK == 1 + struct rte_mbuf *mb = (struct rte_mbuf *)(b - 1); + rte_pktmbuf_data_len (mb) = n_bytes_in_packet; + rte_pktmbuf_pkt_len (mb) = n_bytes_in_packet; +#endif + break; + } + + i_rx--; + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + b->next_buffer = tm->rx_buffers[i_rx]; +#if DPDK == 1 + ASSERT(0); /* $$$$ fixme */ + /* ((struct rte_pktmbuf *)(b->mb))->next = + vlib_get_buffer (vm, tm->rx_buffers[i_rx])->mb; */ +#endif + } + + /* Interface counters for tapcli interface. */ + vlib_increment_combined_counter + (vnet_main.interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + os_get_cpu_number(), + ti->sw_if_index, + 1, n_bytes_in_packet); + + _vec_len (tm->rx_buffers) = i_rx; + } + + b = vlib_get_buffer (vm, bi); + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See .../vlib/vlib/buffer.h + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b); + + { + u32 next_index; + uword n_trace = vlib_get_trace_count (vm, node); + + vnet_buffer (b)->sw_if_index[VLIB_RX] = ti->sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32)~0; + + b->error = node->errors[0]; + + { + next_index = TAPCLI_RX_NEXT_ETHERNET_INPUT; + + next_index = (ti->per_interface_next_index != ~0) ? + ti->per_interface_next_index : next_index; + } + { + vnet_main_t *vnm = vnet_get_main(); + vnet_sw_interface_t * si; + si = vnet_get_sw_interface (vnm, ti->sw_if_index); + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + next_index = TAPCLI_RX_NEXT_DROP; + } + + + vlib_set_next_frame_buffer (vm, node, next_index, bi); + + if (n_trace > 0) + { + vlib_trace_buffer (vm, node, next_index, + b, /* follow_chain */ 1); + vlib_set_trace_count (vm, node, n_trace - 1); + } + } + } + + return 1; +} + +static char * tapcli_rx_error_strings[] = { + "Interface down", +}; + +VLIB_REGISTER_NODE (tapcli_rx_node,static) = { + .function = tapcli_rx, + .name = "tapcli-rx", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = 4, + .n_errors = 1, + .error_strings = tapcli_rx_error_strings, + + .n_next_nodes = TAPCLI_RX_N_NEXT, + .next_nodes = { + [TAPCLI_RX_NEXT_IP4_INPUT] = "ip4-input-no-checksum", + [TAPCLI_RX_NEXT_IP6_INPUT] = "ip6-input", + [TAPCLI_RX_NEXT_DROP] = "error-drop", + [TAPCLI_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", + }, +}; + +/* Gets called when file descriptor is ready from epoll. */ +static clib_error_t * tapcli_read_ready (unix_file_t * uf) +{ + vlib_main_t * vm = vlib_get_main(); + tapcli_main_t * tm = &tapcli_main; + uword * p; + + /* Schedule the rx node */ + vlib_node_set_interrupt_pending (vm, tapcli_rx_node.index); + + p = hash_get (tm->tapcli_interface_index_by_unix_fd, uf->file_descriptor); + + /* Mark the specific tap interface ready-to-read */ + if (p) + tm->pending_read_bitmap = clib_bitmap_set (tm->pending_read_bitmap, + p[0], 1); + else + clib_warning ("fd %d not in hash table", uf->file_descriptor); + + return 0; +} + +static clib_error_t * +tapcli_config (vlib_main_t * vm, unformat_input_t * input) +{ + tapcli_main_t *tm = &tapcli_main; +#if DPDK == 0 + const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; +#else + const uword buffer_size = MBUF_SIZE; +#endif + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "mtu %d", &tm->mtu_bytes)) + ; + else if (unformat (input, "disable")) + tm->is_disabled = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (tm->is_disabled) + return 0; + + if (geteuid()) + { + clib_warning ("tapcli disabled: must be superuser"); + tm->is_disabled = 1; + return 0; + } + + tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size; + + return 0; +} + +static int tap_name_renumber (vnet_hw_interface_t * hi, + u32 new_dev_instance) +{ + tapcli_main_t *tm = &tapcli_main; + + vec_validate_init_empty (tm->show_dev_instance_by_real_dev_instance, + hi->dev_instance, ~0); + + tm->show_dev_instance_by_real_dev_instance [hi->dev_instance] = + new_dev_instance; + + return 0; +} + +VLIB_CONFIG_FUNCTION (tapcli_config, "tapcli"); + +static void +tapcli_nopunt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * buffers = vlib_frame_args (frame); + uword n_packets = frame->n_vectors; + vlib_buffer_free (vm, buffers, n_packets); + vlib_frame_free (vm, node, frame); +} + +VNET_HW_INTERFACE_CLASS (tapcli_interface_class,static) = { + .name = "tapcli", +}; + +static u8 * format_tapcli_interface_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u32 show_dev_instance = ~0; + tapcli_main_t * tm = &tapcli_main; + + if (i < vec_len (tm->show_dev_instance_by_real_dev_instance)) + show_dev_instance = tm->show_dev_instance_by_real_dev_instance[i]; + + if (show_dev_instance != ~0) + i = show_dev_instance; + + s = format (s, "tap-%d", i); + return s; +} + +static u32 tapcli_flag_change (vnet_main_t * vnm, + vnet_hw_interface_t * hw, + u32 flags) +{ + tapcli_main_t *tm = &tapcli_main; + tapcli_interface_t *ti; + struct ifreq ifr; + u32 want_promisc; + + ti = vec_elt_at_index (tm->tapcli_interfaces, hw->dev_instance); + + memcpy (&ifr, &ti->ifr, sizeof (ifr)); + + /* get flags, modify to bring up interface... */ + if (ioctl (ti->provision_fd, SIOCGIFFLAGS, &ifr) < 0) + { + clib_unix_warning ("Couldn't get interface flags for %s", hw->name); + return 0; + } + + want_promisc = (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0; + + if (want_promisc == ti->is_promisc) + return 0; + + + if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) + ifr.ifr_flags |= IFF_PROMISC; + else + ifr.ifr_flags &= ~(IFF_PROMISC); + + /* get flags, modify to bring up interface... */ + if (ioctl (ti->provision_fd, SIOCSIFFLAGS, &ifr) < 0) + { + clib_unix_warning ("Couldn't set interface flags for %s", hw->name); + return 0; + } + + ti->is_promisc = want_promisc; + + return 0; +} + +static void tapcli_set_interface_next_node (vnet_main_t *vnm, + u32 hw_if_index, + u32 node_index) +{ + tapcli_main_t *tm = &tapcli_main; + tapcli_interface_t *ti; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + + ti = vec_elt_at_index (tm->tapcli_interfaces, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + ti->per_interface_next_index = node_index; + return; + } + + ti->per_interface_next_index = + vlib_node_add_next (tm->vlib_main, tapcli_rx_node.index, node_index); +} + +/* + * Mainly exists to set link_state == admin_state + * otherwise, e.g. ip6 neighbor discovery breaks + */ +static clib_error_t * +tapcli_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + uword is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + u32 hw_flags; + u32 speed_duplex = VNET_HW_INTERFACE_FLAG_FULL_DUPLEX + | VNET_HW_INTERFACE_FLAG_SPEED_1G; + + if (is_admin_up) + hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP | speed_duplex; + else + hw_flags = speed_duplex; + + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + return 0; +} + +VNET_DEVICE_CLASS (tapcli_dev_class,static) = { + .name = "tapcli", + .tx_function = tapcli_tx, + .format_device_name = format_tapcli_interface_name, + .rx_redirect_to_node = tapcli_set_interface_next_node, + .name_renumber = tap_name_renumber, + .admin_up_down_function = tapcli_interface_admin_up_down, +}; + +int vnet_tap_dump_ifs (tapcli_interface_details_t **out_tapids) +{ + tapcli_main_t * tm = &tapcli_main; + tapcli_interface_t * ti; + + tapcli_interface_details_t * r_tapids = NULL; + tapcli_interface_details_t * tapid = NULL; + + vec_foreach (ti, tm->tapcli_interfaces) { + if (!ti->active) + continue; + vec_add2(r_tapids, tapid, 1); + tapid->sw_if_index = ti->sw_if_index; + strncpy((char *)tapid->dev_name, ti->ifr.ifr_name, sizeof (ti->ifr.ifr_name)-1); + } + + *out_tapids = r_tapids; + + return 0; +} + +/* get tap interface from inactive interfaces or create new */ +static tapcli_interface_t *tapcli_get_new_tapif() +{ + tapcli_main_t * tm = &tapcli_main; + tapcli_interface_t *ti = NULL; + + int inactive_cnt = vec_len(tm->tapcli_inactive_interfaces); + // if there are any inactive ifaces + if (inactive_cnt > 0) { + // take last + u32 ti_idx = tm->tapcli_inactive_interfaces[inactive_cnt - 1]; + if (vec_len(tm->tapcli_interfaces) > ti_idx) { + ti = vec_elt_at_index (tm->tapcli_interfaces, ti_idx); + clib_warning("reusing tap interface"); + } + // "remove" from inactive list + _vec_len(tm->tapcli_inactive_interfaces) -= 1; + } + + // ti was not retrieved from inactive ifaces - create new + if (!ti) + vec_add2 (tm->tapcli_interfaces, ti, 1); + + return ti; +} + +int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, u8 *hwaddr_arg, + u32 * sw_if_indexp) +{ + tapcli_main_t * tm = &tapcli_main; + tapcli_interface_t * ti = NULL; + struct ifreq ifr; + int flags; + int dev_net_tun_fd; + int dev_tap_fd = -1; + clib_error_t * error; + u8 hwaddr [6]; + int rv = 0; + + if (tm->is_disabled) + { + return VNET_API_ERROR_FEATURE_DISABLED; + } + + flags = IFF_TAP | IFF_NO_PI; + + if ((dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0) + return VNET_API_ERROR_SYSCALL_ERROR_1; + + memset (&ifr, 0, sizeof (ifr)); + strncpy(ifr.ifr_name, (char *) intfc_name, sizeof (ifr.ifr_name)-1); + ifr.ifr_flags = flags; + if (ioctl (dev_net_tun_fd, TUNSETIFF, (void *)&ifr) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_2; + goto error; + } + + /* Open a provisioning socket */ + if ((dev_tap_fd = socket(PF_PACKET, SOCK_RAW, + htons(ETH_P_ALL))) < 0 ) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_3; + goto error; + } + + /* Find the interface index. */ + { + struct ifreq ifr; + struct sockaddr_ll sll; + + memset (&ifr, 0, sizeof(ifr)); + strncpy (ifr.ifr_name, (char *) intfc_name, sizeof (ifr.ifr_name)-1); + if (ioctl (dev_tap_fd, SIOCGIFINDEX, &ifr) < 0 ) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_4; + goto error; + } + + /* Bind the provisioning socket to the interface. */ + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = ifr.ifr_ifindex; + sll.sll_protocol = htons(ETH_P_ALL); + + if (bind(dev_tap_fd, (struct sockaddr*) &sll, sizeof(sll)) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_5; + goto error; + } + } + + /* non-blocking I/O on /dev/tapX */ + { + int one = 1; + if (ioctl (dev_net_tun_fd, FIONBIO, &one) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_6; + goto error; + } + } + ifr.ifr_mtu = tm->mtu_bytes; + if (ioctl (dev_tap_fd, SIOCSIFMTU, &ifr) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_7; + goto error; + } + + /* get flags, modify to bring up interface... */ + if (ioctl (dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_8; + goto error; + } + + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + + if (ioctl (dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_9; + goto error; + } + + if (ioctl (dev_tap_fd, SIOCGIFHWADDR, &ifr) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + ti = tapcli_get_new_tapif(); + + if (hwaddr_arg != 0) + memcpy(hwaddr, hwaddr_arg, 6); + + error = ethernet_register_interface + (tm->vnet_main, + tapcli_dev_class.index, + ti - tm->tapcli_interfaces /* device instance */, + hwaddr_arg != 0 ? hwaddr : + (u8 *) ifr.ifr_hwaddr.sa_data /* ethernet address */, + &ti->hw_if_index, + tapcli_flag_change); + + if (error) + { + clib_error_report (error); + rv = VNET_API_ERROR_INVALID_REGISTRATION; + goto error; + } + + { + unix_file_t template = {0}; + template.read_function = tapcli_read_ready; + template.file_descriptor = dev_net_tun_fd; + ti->unix_file_index = unix_file_add (&unix_main, &template); + ti->unix_fd = dev_net_tun_fd; + ti->provision_fd = dev_tap_fd; + memcpy (&ti->ifr, &ifr, sizeof (ifr)); + } + + { + vnet_hw_interface_t * hw; + hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index); + ti->sw_if_index = hw->sw_if_index; + if (sw_if_indexp) + *sw_if_indexp = hw->sw_if_index; + } + + ti->active = 1; + + hash_set (tm->tapcli_interface_index_by_sw_if_index, ti->sw_if_index, + ti - tm->tapcli_interfaces); + + hash_set (tm->tapcli_interface_index_by_unix_fd, ti->unix_fd, + ti - tm->tapcli_interfaces); + + return rv; + + error: + close (dev_net_tun_fd); + close (dev_tap_fd); + + return rv; +} + +int vnet_tap_connect_renumber (vlib_main_t * vm, u8 * intfc_name, + u8 *hwaddr_arg, u32 * sw_if_indexp, + u8 renumber, u32 custom_dev_instance) +{ + int rv = vnet_tap_connect(vm, intfc_name, hwaddr_arg, sw_if_indexp); + + if (!rv && renumber) + vnet_interface_name_renumber (*sw_if_indexp, custom_dev_instance); + + return rv; +} + +static int tapcli_tap_disconnect (tapcli_interface_t *ti) +{ + int rv = 0; + vnet_main_t * vnm = vnet_get_main(); + tapcli_main_t * tm = &tapcli_main; + u32 sw_if_index = ti->sw_if_index; + + // bring interface down + vnet_sw_interface_set_flags (vnm, sw_if_index, 0); + + if (ti->unix_file_index != ~0) { + unix_file_del (&unix_main, unix_main.file_pool + ti->unix_file_index); + ti->unix_file_index = ~0; + } + + hash_unset (tm->tapcli_interface_index_by_unix_fd, ti->unix_fd); + hash_unset (tm->tapcli_interface_index_by_sw_if_index, ti->sw_if_index); + close(ti->unix_fd); + close(ti->provision_fd); + ti->unix_fd = -1; + ti->provision_fd = -1; + + return rv; +} + +int vnet_tap_delete(vlib_main_t *vm, u32 sw_if_index) +{ + int rv = 0; + tapcli_main_t * tm = &tapcli_main; + tapcli_interface_t *ti; + uword *p = NULL; + + p = hash_get (tm->tapcli_interface_index_by_sw_if_index, + sw_if_index); + if (p == 0) { + clib_warning ("sw_if_index %d unknown", sw_if_index); + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + } + ti = vec_elt_at_index (tm->tapcli_interfaces, p[0]); + + // inactive + ti->active = 0; + tapcli_tap_disconnect(ti); + // add to inactive list + vec_add1(tm->tapcli_inactive_interfaces, ti - tm->tapcli_interfaces); + + // reset renumbered iface + if (p[0] < vec_len (tm->show_dev_instance_by_real_dev_instance)) + tm->show_dev_instance_by_real_dev_instance[p[0]] = ~0; + + ethernet_delete_interface (tm->vnet_main, ti->hw_if_index); + return rv; +} + +static clib_error_t * +tap_delete_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + tapcli_main_t * tm = &tapcli_main; + u32 sw_if_index = ~0; + + if (tm->is_disabled) + { + return clib_error_return (0, "device disabled..."); + } + + if (unformat (input, "%U", unformat_vnet_sw_interface, tm->vnet_main, + &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + + int rc = vnet_tap_delete (vm, sw_if_index); + + if (!rc) { + vlib_cli_output (vm, "Deleted."); + } else { + vlib_cli_output (vm, "Error during deletion of tap interface. (rc: %d)", rc); + } + + return 0; +} + +VLIB_CLI_COMMAND (tap_delete_command, static) = { + .path = "tap delete", + .short_help = "tap delete <vpp-tap-intfc-name>", + .function = tap_delete_command_fn, +}; + +/* modifies tap interface - can result in new interface being created */ +int vnet_tap_modify (vlib_main_t * vm, u32 orig_sw_if_index, + u8 * intfc_name, u8 *hwaddr_arg, + u32 * sw_if_indexp, + u8 renumber, u32 custom_dev_instance) +{ + int rv = vnet_tap_delete (vm, orig_sw_if_index); + + if (rv) + return rv; + + rv = vnet_tap_connect_renumber(vm, intfc_name, hwaddr_arg, sw_if_indexp, + renumber, custom_dev_instance); + + return rv; +} + +static clib_error_t * +tap_modify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 * intfc_name; + tapcli_main_t * tm = &tapcli_main; + u32 sw_if_index = ~0; + u32 new_sw_if_index = ~0; + int user_hwaddr = 0; + u8 hwaddr[6]; + + if (tm->is_disabled) + { + return clib_error_return (0, "device disabled..."); + } + + if (unformat (input, "%U", unformat_vnet_sw_interface, tm->vnet_main, + &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + if (unformat (input, "%s", &intfc_name)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + if (unformat(input, "hwaddr %U", unformat_ethernet_address, + &hwaddr)) + user_hwaddr = 1; + + + int rc = vnet_tap_modify (vm, sw_if_index, intfc_name, + (user_hwaddr == 1 ? hwaddr : 0), + &new_sw_if_index, 0, 0); + + if (!rc) { + vlib_cli_output (vm, "Modified %U for Linux tap '%s'", + format_vnet_sw_if_index_name, tm->vnet_main, + new_sw_if_index, intfc_name); + } else { + vlib_cli_output (vm, "Error during modification of tap interface. (rc: %d)", rc); + } + + return 0; +} + +VLIB_CLI_COMMAND (tap_modify_command, static) = { + .path = "tap modify", + .short_help = "tap modify <vpp-tap-intfc-name> <linux-intfc-name> [hwaddr [<addr> | random]]", + .function = tap_modify_command_fn, +}; + +static clib_error_t * +tap_connect_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 * intfc_name; + tapcli_main_t * tm = &tapcli_main; + tapcli_interface_t * ti; + struct ifreq ifr; + int flags; + int dev_net_tun_fd; + int dev_tap_fd = -1; + clib_error_t * error; + int user_hwaddr = 0; + u8 hwaddr[6]; + + if (tm->is_disabled) + { + return clib_error_return (0, "device disabled..."); + } + + if (unformat (input, "%s", &intfc_name)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + if (unformat(input, "hwaddr %U", unformat_ethernet_address, + &hwaddr)) + user_hwaddr = 1; + + flags = IFF_TAP | IFF_NO_PI; + + if ((dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0) + { + vlib_cli_output (vm, "Couldn't open /dev/net/tun"); + return 0; + } + + memset (&ifr, 0, sizeof (ifr)); + strncpy(ifr.ifr_name, (char *) intfc_name, sizeof (ifr.ifr_name)-1); + ifr.ifr_flags = flags; + if (ioctl (dev_net_tun_fd, TUNSETIFF, (void *)&ifr) < 0) + { + vlib_cli_output (vm, "Error setting flags on '%s'", intfc_name); + goto error; + } + + /* Open a provisioning socket */ + if ((dev_tap_fd = socket(PF_PACKET, SOCK_RAW, + htons(ETH_P_ALL))) < 0 ) + { + vlib_cli_output (vm, "Couldn't open provisioning socket"); + goto error; + } + + /* Find the interface index. */ + { + struct ifreq ifr; + struct sockaddr_ll sll; + + memset (&ifr, 0, sizeof(ifr)); + strncpy (ifr.ifr_name, (char *) intfc_name, sizeof (ifr.ifr_name)-1); + if (ioctl (dev_tap_fd, SIOCGIFINDEX, &ifr) < 0 ) + { + vlib_cli_output (vm, "Couldn't get if_index"); + goto error; + } + + /* Bind the provisioning socket to the interface. */ + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = ifr.ifr_ifindex; + sll.sll_protocol = htons(ETH_P_ALL); + + if (bind(dev_tap_fd, (struct sockaddr*) &sll, sizeof(sll)) < 0) + { + vlib_cli_output (vm, "Couldn't bind provisioning socket"); + goto error; + } + } + + /* non-blocking I/O on /dev/tapX */ + { + int one = 1; + if (ioctl (dev_net_tun_fd, FIONBIO, &one) < 0) + { + vlib_cli_output (0, "Couldn't set device non-blocking flag"); + goto error; + } + } + ifr.ifr_mtu = tm->mtu_bytes; + if (ioctl (dev_tap_fd, SIOCSIFMTU, &ifr) < 0) + { + vlib_cli_output (0, "Couldn't set device MTU"); + goto error; + } + + /* get flags, modify to bring up interface... */ + if (ioctl (dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0) + { + vlib_cli_output (0, "Couldn't get interface flags"); + goto error; + } + + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + + if (ioctl (dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0) + { + vlib_cli_output (0, "Couldn't set intfc admin state up"); + goto error; + } + + if (ioctl (dev_tap_fd, SIOCGIFHWADDR, &ifr) < 0) + { + vlib_cli_output (0, "Couldn't get intfc MAC address"); + goto error; + } + + ti = tapcli_get_new_tapif(); + ti->per_interface_next_index = ~0; + + if (unformat(input, "hwaddr random")) + { + f64 now = vlib_time_now(vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (hwaddr+2, &rnd, sizeof(rnd)); + hwaddr[0] = 2; + hwaddr[1] = 0xfe; + user_hwaddr = 1; + } + + error = ethernet_register_interface + (tm->vnet_main, + tapcli_dev_class.index, + ti - tm->tapcli_interfaces /* device instance */, + user_hwaddr ? hwaddr : + (u8 *) ifr.ifr_hwaddr.sa_data /* ethernet address */, + &ti->hw_if_index, + tapcli_flag_change); + + if (error) + clib_error_report (error); + + { + unix_file_t template = {0}; + template.read_function = tapcli_read_ready; + template.file_descriptor = dev_net_tun_fd; + ti->unix_file_index = unix_file_add (&unix_main, &template); + ti->unix_fd = dev_net_tun_fd; + ti->provision_fd = dev_tap_fd; + memcpy (&ti->ifr, &ifr, sizeof (ifr)); + } + + { + vnet_hw_interface_t * hw; + hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index); + ti->sw_if_index = hw->sw_if_index; + } + + ti->active = 1; + + hash_set (tm->tapcli_interface_index_by_sw_if_index, ti->sw_if_index, + ti - tm->tapcli_interfaces); + + hash_set (tm->tapcli_interface_index_by_unix_fd, ti->unix_fd, + ti - tm->tapcli_interfaces); + + vlib_cli_output (vm, "Created %U for Linux tap '%s'", + format_vnet_sw_if_index_name, tm->vnet_main, + ti->sw_if_index, intfc_name); + + return 0; + + error: + close (dev_net_tun_fd); + close (dev_tap_fd); + + return 0; +} + +VLIB_CLI_COMMAND (tap_connect_command, static) = { + .path = "tap connect", + .short_help = "tap connect <intfc-name> [hwaddr [<addr> | random]]", + .function = tap_connect_command_fn, +}; + +clib_error_t * +tapcli_init (vlib_main_t * vm) +{ + tapcli_main_t * tm = &tapcli_main; + + tm->vlib_main = vm; + tm->vnet_main = vnet_get_main(); + tm->unix_main = &unix_main; + tm->mtu_bytes = 4096 + 256; + tm->tapcli_interface_index_by_sw_if_index = hash_create (0, sizeof(uword)); + tm->tapcli_interface_index_by_unix_fd = hash_create (0, sizeof (uword)); + vm->os_punt_frame = tapcli_nopunt_frame; + + return 0; +} + +VLIB_INIT_FUNCTION (tapcli_init); + + diff --git a/vnet/vnet/unix/tapcli.h b/vnet/vnet/unix/tapcli.h new file mode 100644 index 00000000000..1f5f4c3ee73 --- /dev/null +++ b/vnet/vnet/unix/tapcli.h @@ -0,0 +1,29 @@ +/* + * tapcli.h : tap support + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_tapcli_h__ +#define __included_tapcli_h__ + + +typedef struct { + u32 sw_if_index; + u8 dev_name[64]; +} tapcli_interface_details_t; + +int vnet_tap_dump_ifs (tapcli_interface_details_t **out_tapids); + +#endif /* __included_tapcli_h__ */ diff --git a/vnet/vnet/unix/tuntap.c b/vnet/vnet/unix/tuntap.c new file mode 100644 index 00000000000..77c60fd6ee2 --- /dev/null +++ b/vnet/vnet/unix/tuntap.c @@ -0,0 +1,907 @@ +/* + *------------------------------------------------------------------ + * tuntap.c - kernel stack (reverse) punt/inject path + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <fcntl.h> /* for open */ +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/uio.h> /* for iovec */ +#include <netinet/in.h> + +#include <linux/if_arp.h> +#include <linux/if_tun.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> + +#include <vnet/ip/ip.h> + +#include <vnet/ethernet/ethernet.h> + +#if DPDK == 1 +#include <vnet/devices/dpdk/dpdk.h> +#endif + +static vnet_device_class_t tuntap_dev_class; +static vnet_hw_interface_class_t tuntap_interface_class; + +static void tuntap_punt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame); +static void tuntap_nopunt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame); + +/* + * This driver runs in one of two distinct modes: + * "punt/inject" mode, where we send pkts not otherwise processed + * by the forwarding to the Linux kernel stack, and + * "normal interface" mode, where we treat the Linux kernel stack + * as a peer. + * + * By default, we select punt/inject mode. + */ + +typedef struct { + u32 sw_if_index; + u8 is_v6; + u8 addr[16]; +} subif_address_t; + +typedef struct { + /* Vector of iovecs for readv/writev calls. */ + struct iovec * iovecs; + + /* Vector of VLIB rx buffers to use. We allocate them in blocks + of VLIB_FRAME_SIZE (256). */ + u32 * rx_buffers; + + /* File descriptors for /dev/net/tun and provisioning socket. */ + int dev_net_tun_fd, dev_tap_fd; + + /* Create a "tap" [ethernet] encaps device */ + int is_ether; + + /* 1 if a "normal" routed intfc, 0 if a punt/inject interface */ + + int have_normal_interface; + + /* tap device destination MAC address. Required, or Linux drops pkts */ + u8 ether_dst_mac[6]; + + /* Interface MTU in bytes and # of default sized buffers. */ + u32 mtu_bytes, mtu_buffers; + + /* Linux interface name for tun device. */ + char * tun_name; + + /* Pool of subinterface addresses */ + subif_address_t *subifs; + + /* Hash for subif addresses */ + mhash_t subif_mhash; + + u32 unix_file_index; + + /* For the "normal" interface, if configured */ + u32 hw_if_index, sw_if_index; + +} tuntap_main_t; + +static tuntap_main_t tuntap_main = { + .tun_name = "vnet", + + /* Suitable defaults for an Ethernet-like tun/tap device */ + .mtu_bytes = 4096 + 256, +}; + +/* + * tuntap_tx + * Output node, writes the buffers comprising the incoming frame + * to the tun/tap device, aka hands them to the Linux kernel stack. + * + */ +static uword +tuntap_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * buffers = vlib_frame_args (frame); + uword n_packets = frame->n_vectors; + tuntap_main_t * tm = &tuntap_main; + int i; + + for (i = 0; i < n_packets; i++) + { + struct iovec * iov; + vlib_buffer_t * b; + uword l; + + b = vlib_get_buffer (vm, buffers[i]); + + if (tm->is_ether && (!tm->have_normal_interface)) + { + vlib_buffer_reset(b); + memcpy (vlib_buffer_get_current (b), tm->ether_dst_mac, 6); + } + + /* Re-set iovecs if present. */ + if (tm->iovecs) + _vec_len (tm->iovecs) = 0; + + /* VLIB buffer chain -> Unix iovec(s). */ + vec_add2 (tm->iovecs, iov, 1); + iov->iov_base = b->data + b->current_data; + iov->iov_len = l = b->current_length; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + do { + b = vlib_get_buffer (vm, b->next_buffer); + + vec_add2 (tm->iovecs, iov, 1); + + iov->iov_base = b->data + b->current_data; + iov->iov_len = b->current_length; + l += b->current_length; + } while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + } + + if (writev (tm->dev_net_tun_fd, tm->iovecs, vec_len (tm->iovecs)) < l) + clib_unix_warning ("writev"); + } + + /* The normal interface path flattens the buffer chain */ + if (tm->have_normal_interface) + vlib_buffer_free_no_next (vm, buffers, n_packets); + else + vlib_buffer_free (vm, buffers, n_packets); + + return n_packets; +} + +VLIB_REGISTER_NODE (tuntap_tx_node,static) = { + .function = tuntap_tx, + .name = "tuntap-tx", + .type = VLIB_NODE_TYPE_INTERNAL, + .vector_size = 4, +}; + +enum { + TUNTAP_RX_NEXT_IP4_INPUT, + TUNTAP_RX_NEXT_IP6_INPUT, + TUNTAP_RX_NEXT_ETHERNET_INPUT, + TUNTAP_RX_NEXT_DROP, + TUNTAP_RX_N_NEXT, +}; + +static uword +tuntap_rx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + tuntap_main_t * tm = &tuntap_main; + vlib_buffer_t * b; + u32 bi; +#if DPDK == 0 + const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + u32 free_list_index = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX; +#else + dpdk_main_t * dm = &dpdk_main; + const uword buffer_size = MBUF_SIZE; + u32 free_list_index = dm->vlib_buffer_free_list_index; +#endif + + /* Make sure we have some RX buffers. */ + { + uword n_left = vec_len (tm->rx_buffers); + uword n_alloc; + + if (n_left < VLIB_FRAME_SIZE / 2) + { + if (! tm->rx_buffers) + vec_alloc (tm->rx_buffers, VLIB_FRAME_SIZE); + + n_alloc = vlib_buffer_alloc_from_free_list + (vm, tm->rx_buffers + n_left, VLIB_FRAME_SIZE - n_left, + free_list_index); + _vec_len (tm->rx_buffers) = n_left + n_alloc; + } + } + + /* Allocate RX buffers from end of rx_buffers. + Turn them into iovecs to pass to readv. */ + { + uword i_rx = vec_len (tm->rx_buffers) - 1; + vlib_buffer_t * b; + word i, n_bytes_left, n_bytes_in_packet; + + /* We should have enough buffers left for an MTU sized packet. */ + ASSERT (vec_len (tm->rx_buffers) >= tm->mtu_buffers); + + vec_validate (tm->iovecs, tm->mtu_buffers - 1); + for (i = 0; i < tm->mtu_buffers; i++) + { + b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - i]); + tm->iovecs[i].iov_base = b->data; + tm->iovecs[i].iov_len = buffer_size; + } + + n_bytes_left = readv (tm->dev_net_tun_fd, tm->iovecs, tm->mtu_buffers); + n_bytes_in_packet = n_bytes_left; + if (n_bytes_left <= 0) + { + if (errno != EAGAIN) + clib_unix_warning ("readv %d", n_bytes_left); + return 0; + } + + bi = tm->rx_buffers[i_rx]; + + while (1) + { +#if DPDK == 1 + struct rte_mbuf * mb; +#endif + b = vlib_get_buffer (vm, tm->rx_buffers[i_rx]); +#if DPDK == 1 + mb = (((struct rte_mbuf *)b)-1); +#endif + b->flags = 0; + b->current_data = 0; + b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size; + + n_bytes_left -= buffer_size; +#if DPDK == 1 + rte_pktmbuf_data_len (mb) = b->current_length; +#endif + + if (n_bytes_left <= 0) + { +#if DPDK == 1 + rte_pktmbuf_pkt_len (mb) = n_bytes_in_packet; +#endif + break; + } + + i_rx--; + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + b->next_buffer = tm->rx_buffers[i_rx]; +#if DPDK == 1 + ASSERT(0); + // ((struct rte_pktmbuf *)(b->mb))->next = + // vlib_get_buffer (vm, tm->rx_buffers[i_rx])->mb; +#endif + } + + /* Interface counters for tuntap interface. */ + vlib_increment_combined_counter + (vnet_main.interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + os_get_cpu_number(), + tm->sw_if_index, + 1, n_bytes_in_packet); + + _vec_len (tm->rx_buffers) = i_rx; + } + + b = vlib_get_buffer (vm, bi); + + { + u32 next_index; + uword n_trace = vlib_get_trace_count (vm, node); + + vnet_buffer (b)->sw_if_index[VLIB_RX] = tm->sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32)~0; + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... + */ + if (VLIB_BUFFER_TRACE_TRAJECTORY) + b->pre_data[0] = 0; + + b->error = node->errors[0]; + + if (tm->is_ether) + { + next_index = TUNTAP_RX_NEXT_ETHERNET_INPUT; + } + else + switch (b->data[0] & 0xf0) + { + case 0x40: + next_index = TUNTAP_RX_NEXT_IP4_INPUT; + break; + case 0x60: + next_index = TUNTAP_RX_NEXT_IP6_INPUT; + break; + default: + next_index = TUNTAP_RX_NEXT_DROP; + break; + } + + /* The linux kernel couldn't care less if our interface is up */ + if (tm->have_normal_interface) + { + vnet_main_t *vnm = vnet_get_main(); + vnet_sw_interface_t * si; + si = vnet_get_sw_interface (vnm, tm->sw_if_index); + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + next_index = TUNTAP_RX_NEXT_DROP; + } + + vlib_set_next_frame_buffer (vm, node, next_index, bi); + + if (n_trace > 0) + { + vlib_trace_buffer (vm, node, next_index, + b, /* follow_chain */ 1); + vlib_set_trace_count (vm, node, n_trace - 1); + } + } + + return 1; +} + +static char * tuntap_rx_error_strings[] = { + "unknown packet type", +}; + +VLIB_REGISTER_NODE (tuntap_rx_node,static) = { + .function = tuntap_rx, + .name = "tuntap-rx", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = 4, + .n_errors = 1, + .error_strings = tuntap_rx_error_strings, + + .n_next_nodes = TUNTAP_RX_N_NEXT, + .next_nodes = { + [TUNTAP_RX_NEXT_IP4_INPUT] = "ip4-input-no-checksum", + [TUNTAP_RX_NEXT_IP6_INPUT] = "ip6-input", + [TUNTAP_RX_NEXT_DROP] = "error-drop", + [TUNTAP_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", + }, +}; + +/* Gets called when file descriptor is ready from epoll. */ +static clib_error_t * tuntap_read_ready (unix_file_t * uf) +{ + vlib_main_t * vm = vlib_get_main(); + vlib_node_set_interrupt_pending (vm, tuntap_rx_node.index); + return 0; +} + +/* + * tuntap_exit + * Clean up the tun/tap device + */ + +static clib_error_t * +tuntap_exit (vlib_main_t * vm) +{ + tuntap_main_t *tm = &tuntap_main; + struct ifreq ifr; + int sfd; + + /* Not present. */ + if (! tm->dev_net_tun_fd || tm->dev_net_tun_fd < 0) + return 0; + + sfd = socket (AF_INET, SOCK_STREAM, 0); + if (sfd < 0) + clib_unix_warning("provisioning socket"); + + memset(&ifr, 0, sizeof (ifr)); + strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name)-1); + + /* get flags, modify to bring down interface... */ + if (ioctl (sfd, SIOCGIFFLAGS, &ifr) < 0) + clib_unix_warning ("SIOCGIFFLAGS"); + + ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); + + if (ioctl (sfd, SIOCSIFFLAGS, &ifr) < 0) + clib_unix_warning ("SIOCSIFFLAGS"); + + /* Turn off persistence */ + if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 0) < 0) + clib_unix_warning ("TUNSETPERSIST"); + close(tm->dev_tap_fd); + close(tm->dev_net_tun_fd); + close (sfd); + + return 0; +} + +VLIB_MAIN_LOOP_EXIT_FUNCTION (tuntap_exit); + +static clib_error_t * +tuntap_config (vlib_main_t * vm, unformat_input_t * input) +{ + tuntap_main_t *tm = &tuntap_main; + clib_error_t * error = 0; + struct ifreq ifr; + u8 * name; + int flags = IFF_TUN | IFF_NO_PI; + int is_enabled = 0, is_ether = 0, have_normal_interface = 0; +#if DPDK == 0 + const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; +#else + const uword buffer_size = MBUF_SIZE; +#endif + + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "mtu %d", &tm->mtu_bytes)) + ; + else if (unformat (input, "enable")) + is_enabled = 1; + else if (unformat (input, "disable")) + is_enabled = 0; + else if (unformat (input, "ethernet") || + unformat (input, "ether")) + is_ether = 1; + else if (unformat (input, "have-normal-interface") || + unformat (input, "have-normal")) + have_normal_interface = 1; + else if (unformat (input, "name %s", &name)) + tm->tun_name = (char *) name; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + tm->dev_net_tun_fd = -1; + tm->dev_tap_fd = -1; + + if (is_enabled == 0) + return 0; + + if (geteuid()) + { + clib_warning ("tuntap disabled: must be superuser"); + return 0; + } + + tm->is_ether = is_ether; + tm->have_normal_interface = have_normal_interface; + + if (is_ether) + flags = IFF_TAP | IFF_NO_PI; + + if ((tm->dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0) + { + error = clib_error_return_unix (0, "open /dev/net/tun"); + goto done; + } + + memset (&ifr, 0, sizeof (ifr)); + strncpy(ifr.ifr_name, tm->tun_name, sizeof(ifr.ifr_name)-1); + ifr.ifr_flags = flags; + if (ioctl (tm->dev_net_tun_fd, TUNSETIFF, (void *)&ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl TUNSETIFF"); + goto done; + } + + /* Make it persistent, at least until we split. */ + if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 1) < 0) + { + error = clib_error_return_unix (0, "TUNSETPERSIST"); + goto done; + } + + /* Open a provisioning socket */ + if ((tm->dev_tap_fd = socket(PF_PACKET, SOCK_RAW, + htons(ETH_P_ALL))) < 0 ) + { + error = clib_error_return_unix (0, "socket"); + goto done; + } + + /* Find the interface index. */ + { + struct ifreq ifr; + struct sockaddr_ll sll; + + memset (&ifr, 0, sizeof(ifr)); + strncpy (ifr.ifr_name, tm->tun_name, sizeof(ifr.ifr_name)-1); + if (ioctl (tm->dev_tap_fd, SIOCGIFINDEX, &ifr) < 0 ) + { + error = clib_error_return_unix (0, "ioctl SIOCGIFINDEX"); + goto done; + } + + /* Bind the provisioning socket to the interface. */ + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = ifr.ifr_ifindex; + sll.sll_protocol = htons(ETH_P_ALL); + + if (bind(tm->dev_tap_fd, (struct sockaddr*) &sll, sizeof(sll)) < 0) + { + error = clib_error_return_unix (0, "bind"); + goto done; + } + } + + /* non-blocking I/O on /dev/tapX */ + { + int one = 1; + if (ioctl (tm->dev_net_tun_fd, FIONBIO, &one) < 0) + { + error = clib_error_return_unix (0, "ioctl FIONBIO"); + goto done; + } + } + + tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size; + + ifr.ifr_mtu = tm->mtu_bytes; + if (ioctl (tm->dev_tap_fd, SIOCSIFMTU, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl SIOCSIFMTU"); + goto done; + } + + /* get flags, modify to bring up interface... */ + if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl SIOCGIFFLAGS"); + goto done; + } + + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + + if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl SIOCSIFFLAGS"); + goto done; + } + + if (is_ether) + { + if (ioctl (tm->dev_tap_fd, SIOCGIFHWADDR, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl SIOCGIFHWADDR"); + goto done; + } + else + memcpy (tm->ether_dst_mac, ifr.ifr_hwaddr.sa_data, 6); + } + + if (have_normal_interface) + { + vnet_main_t *vnm = vnet_get_main(); + error = ethernet_register_interface + (vnm, + tuntap_dev_class.index, + 0 /* device instance */, + tm->ether_dst_mac /* ethernet address */, + &tm->hw_if_index, + 0 /* flag change */); + if (error) + clib_error_report (error); + tm->sw_if_index = tm->hw_if_index; + vm->os_punt_frame = tuntap_nopunt_frame; + } + else + { + vnet_main_t *vnm = vnet_get_main(); + vnet_hw_interface_t * hi; + + vm->os_punt_frame = tuntap_punt_frame; + + tm->hw_if_index = vnet_register_interface + (vnm, + tuntap_dev_class.index, 0 /* device instance */, + tuntap_interface_class.index, 0); + hi = vnet_get_hw_interface (vnm, tm->hw_if_index); + tm->sw_if_index = hi->sw_if_index; + + /* Interface is always up. */ + vnet_hw_interface_set_flags (vnm, tm->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + vnet_sw_interface_set_flags (vnm, tm->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + } + + { + unix_file_t template = {0}; + template.read_function = tuntap_read_ready; + template.file_descriptor = tm->dev_net_tun_fd; + tm->unix_file_index = unix_file_add (&unix_main, &template); + } + + done: + if (error) + { + if (tm->dev_net_tun_fd >= 0) + close (tm->dev_net_tun_fd); + if (tm->dev_tap_fd >= 0) + close (tm->dev_tap_fd); + } + + return error; +} + +VLIB_CONFIG_FUNCTION (tuntap_config, "tuntap"); + +void +tuntap_ip4_add_del_interface_address (ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, + u32 is_delete) +{ + tuntap_main_t * tm = &tuntap_main; + struct ifreq ifr; + subif_address_t subif_addr, * ap; + uword * p; + + /* Tuntap disabled, or using a "normal" interface. */ + if (tm->have_normal_interface || tm->dev_tap_fd < 0) + return; + + /* See if we already know about this subif */ + memset (&subif_addr, 0, sizeof (subif_addr)); + subif_addr.sw_if_index = sw_if_index; + memcpy (&subif_addr.addr, address, sizeof (*address)); + + p = mhash_get (&tm->subif_mhash, &subif_addr); + + if (p) + ap = pool_elt_at_index (tm->subifs, p[0]); + else + { + pool_get (tm->subifs, ap); + *ap = subif_addr; + mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0); + } + + /* Use subif pool index to select alias device. */ + memset (&ifr, 0, sizeof (ifr)); + snprintf (ifr.ifr_name, sizeof(ifr.ifr_name), + "%s:%d", tm->tun_name, (int)(ap - tm->subifs)); + + if (! is_delete) + { + struct sockaddr_in * sin; + + sin = (struct sockaddr_in *)&ifr.ifr_addr; + + /* Set ipv4 address, netmask. */ + sin->sin_family = AF_INET; + memcpy (&sin->sin_addr.s_addr, address, 4); + if (ioctl (tm->dev_tap_fd, SIOCSIFADDR, &ifr) < 0) + clib_unix_warning ("ioctl SIOCSIFADDR"); + + sin->sin_addr.s_addr = im->fib_masks[address_length]; + if (ioctl (tm->dev_tap_fd, SIOCSIFNETMASK, &ifr) < 0) + clib_unix_warning ("ioctl SIOCSIFNETMASK"); + } + else + { + mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */); + pool_put (tm->subifs, ap); + } + + /* get flags, modify to bring up interface... */ + if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0) + clib_unix_warning ("ioctl SIOCGIFFLAGS"); + + if (is_delete) + ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); + else + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + + if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0) + clib_unix_warning ("ioctl SIOCSIFFLAGS"); +} + +/* + * $$$$ gross workaround for a known #include bug + * #include <linux/ipv6.h> causes multiple definitions if + * netinet/in.h is also included. + */ +struct in6_ifreq { + struct in6_addr ifr6_addr; + u32 ifr6_prefixlen; + int ifr6_ifindex; +}; + +/* + * Both the v6 interface address API and the way ifconfig + * displays subinterfaces differ from their v4 couterparts. + * The code given here seems to work but YMMV. + */ +void +tuntap_ip6_add_del_interface_address (ip6_main_t * im, + uword opaque, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, + u32 if_address_index, + u32 is_delete) +{ + tuntap_main_t * tm = &tuntap_main; + struct ifreq ifr; + struct in6_ifreq ifr6; + subif_address_t subif_addr, * ap; + uword * p; + + /* Tuntap disabled, or using a "normal" interface. */ + if (tm->have_normal_interface || tm->dev_tap_fd < 0) + return; + + /* See if we already know about this subif */ + memset (&subif_addr, 0, sizeof (subif_addr)); + subif_addr.sw_if_index = sw_if_index; + subif_addr.is_v6 = 1; + memcpy (&subif_addr.addr, address, sizeof (*address)); + + p = mhash_get (&tm->subif_mhash, &subif_addr); + + if (p) + ap = pool_elt_at_index (tm->subifs, p[0]); + else + { + pool_get (tm->subifs, ap); + *ap = subif_addr; + mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0); + } + + /* Use subif pool index to select alias device. */ + memset (&ifr, 0, sizeof (ifr)); + memset (&ifr6, 0, sizeof (ifr6)); + snprintf (ifr.ifr_name, sizeof(ifr.ifr_name), + "%s:%d", tm->tun_name, (int)(ap - tm->subifs)); + + if (! is_delete) + { + int sockfd = socket (AF_INET6, SOCK_STREAM, 0); + if (sockfd < 0) + clib_unix_warning ("get ifindex socket"); + + if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0) + clib_unix_warning ("get ifindex"); + + ifr6.ifr6_ifindex = ifr.ifr_ifindex; + ifr6.ifr6_prefixlen = address_length; + memcpy (&ifr6.ifr6_addr, address, 16); + + if (ioctl (sockfd, SIOCSIFADDR, &ifr6) < 0) + clib_unix_warning ("set address"); + + close (sockfd); + } + else + { + int sockfd = socket (AF_INET6, SOCK_STREAM, 0); + if (sockfd < 0) + clib_unix_warning ("get ifindex socket"); + + if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0) + clib_unix_warning ("get ifindex"); + + ifr6.ifr6_ifindex = ifr.ifr_ifindex; + ifr6.ifr6_prefixlen = address_length; + memcpy (&ifr6.ifr6_addr, address, 16); + + if (ioctl (sockfd, SIOCDIFADDR, &ifr6) < 0) + clib_unix_warning ("del address"); + + close (sockfd); + + mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */); + pool_put (tm->subifs, ap); + } +} + +static void +tuntap_punt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + tuntap_tx (vm, node, frame); + vlib_frame_free (vm, node, frame); +} + +static void +tuntap_nopunt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * buffers = vlib_frame_args (frame); + uword n_packets = frame->n_vectors; + vlib_buffer_free (vm, buffers, n_packets); + vlib_frame_free (vm, node, frame); +} + +VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = { + .name = "tuntap", +}; + +static u8 * format_tuntap_interface_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + + s = format (s, "tuntap-%d", i); + return s; +} + +static uword +tuntap_intfc_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + tuntap_main_t * tm = &tuntap_main; + u32 * buffers = vlib_frame_args (frame); + uword n_buffers = frame->n_vectors; + + /* Normal interface transmit happens only on the normal interface... */ + if (tm->have_normal_interface) + return tuntap_tx (vm, node, frame); + + vlib_buffer_free (vm, buffers, n_buffers); + return n_buffers; +} + +VNET_DEVICE_CLASS (tuntap_dev_class,static) = { + .name = "tuntap", + .tx_function = tuntap_intfc_tx, + .format_device_name = format_tuntap_interface_name, +}; + +static clib_error_t * +tuntap_init (vlib_main_t * vm) +{ + clib_error_t * error; + ip4_main_t * im4 = &ip4_main; + ip6_main_t * im6 = &ip6_main; + ip4_add_del_interface_address_callback_t cb4; + ip6_add_del_interface_address_callback_t cb6; + tuntap_main_t * tm = &tuntap_main; + + error = vlib_call_init_function (vm, ip4_init); + if (error) + return error; + + mhash_init (&tm->subif_mhash, sizeof (u32), sizeof(subif_address_t)); + + cb4.function = tuntap_ip4_add_del_interface_address; + cb4.function_opaque = 0; + vec_add1 (im4->add_del_interface_address_callbacks, cb4); + + cb6.function = tuntap_ip6_add_del_interface_address; + cb6.function_opaque = 0; + vec_add1 (im6->add_del_interface_address_callbacks, cb6); + + return 0; +} + +VLIB_INIT_FUNCTION (tuntap_init); diff --git a/vnet/vnet/unix/tuntap.h b/vnet/vnet/unix/tuntap.h new file mode 100644 index 00000000000..ba0b77938e8 --- /dev/null +++ b/vnet/vnet/unix/tuntap.h @@ -0,0 +1,37 @@ +/* + *------------------------------------------------------------------ + * tuntap.h - kernel stack (reverse) punt/inject path + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +/* + * Call from some VLIB_INIT_FUNCTION to set the Linux kernel + * inject node name. + */ +void register_tuntap_inject_node_name (char *name); + +int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, + u8 *hwaddr_arg, u32 * sw_if_indexp); +int vnet_tap_connect_renumber (vlib_main_t * vm, u8 * intfc_name, + u8 *hwaddr_arg, u32 * sw_if_indexp, + u8 renumber, u32 custom_dev_instance); + +int vnet_tap_delete(vlib_main_t *vm, u32 sw_if_index); + +int vnet_tap_modify (vlib_main_t * vm, u32 orig_sw_if_index, + u8 * intfc_name, u8 *hwaddr_arg, + u32 * sw_if_indexp, + u8 renumber, u32 custom_dev_instance); diff --git a/vnet/vnet/vcgn/cgn_bitmap.h b/vnet/vnet/vcgn/cgn_bitmap.h new file mode 100644 index 00000000000..6c46b75a608 --- /dev/null +++ b/vnet/vnet/vcgn/cgn_bitmap.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Modifications to this file + * Copyright (c) 2006-2009 by cisco Systems, Inc. + * All rights reserved. + */ + +/* + Copyright (c) 2001, 2002, 2003, 2005 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef __CGN_BITMAP_H__ +#define __CGN_BITMAP_H__ + +/* Bitmaps built as vectors of machine words. */ + +#include <string.h> +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/random.h> + +#define clib_bitmap_dup(v) vec_dup(v) +#define clib_bitmap_free(v) vec_free(v) +#define clib_bitmap_bytes(v) vec_bytes(v) +#define clib_bitmap_zero(v) vec_zero(v) + +/* Allocate bitmap with given number of bits. */ +#define clib_bitmap_alloc(v,n_bits) \ + v = vec_new (uword, ((n_bits) + BITS (uword) - 1) / BITS (uword)) + +/* Sets given bit. Returns old value. */ +static inline uword +cgn_clib_bitmap_set_no_check (uword * a, uword i) +{ + uword i0 = i / BITS (a[0]); + uword bit = (uword) 1 << (i % BITS (a[0])); + uword ai; + +/* ASSERT (i0 < vec_len (a)); */ + ai = a[i0]; + a[i0] = ai | bit; + + return (ai & bit) != 0; +} + +/* Clears given bit. Returns old value. */ +static inline +uword cgn_clib_bitmap_clear_no_check (uword * a, uword i) +{ + uword i0 = i / BITS (a[0]); + uword bit = (uword) 1 << (i % BITS (a[0])); + uword ai; + +/* ASSERT (i0 < vec_len (a)); */ + ai = a[i0]; + a[i0] = ai & ~bit; + + return (ai & bit) != 0; +} + +/* Gets num_bits from ai start at start. assume that all bits are + * in the same uword. + */ +static inline uword cgn_clib_bitmap_get_bits (uword *ai, u16 start, + unsigned char num_bits) +{ + uword i0 = start / BITS (ai[0]); + uword i1 = start % BITS (ai[0]); + uword result = ai[i0] >> i1; + if(num_bits >= BITS(ai[0])) return result; + /* Else, we have to trim the bits */ + result = result & (((uword)1 << num_bits) - 1); + return result; +} + +/* Check if all of the bits from start to numb_bits are avaiable */ +static inline uword cgn_clib_bitmap_check_if_all (uword *ai, u16 start, + i16 num_bits) +{ + /* Now check if any bits are zero.. if yes, return false */ + uword bitmask; + if(num_bits >= BITS(ai[0])) { + /* assume that its going to be multiples of BUTS(ai[0]) */ + uword i0 = start / BITS (ai[0]); + bitmask = ~0; /* set all bits to 1 */ + do { + if(ai[i0] ^ bitmask) return 0; + num_bits = num_bits - BITS (ai[0]); + i0++; + } while (num_bits > 0); + return 1; + } + else { + uword result = cgn_clib_bitmap_get_bits (ai, start, num_bits); + bitmask = ((uword)1 << num_bits) -1; /* set only num_bits */ + return (!(result ^ bitmask)); + } +} + +#endif diff --git a/vnet/vnet/vcgn/cgse_defs.h b/vnet/vnet/vcgn/cgse_defs.h new file mode 100644 index 00000000000..08255875fec --- /dev/null +++ b/vnet/vnet/vcgn/cgse_defs.h @@ -0,0 +1,88 @@ +/* + *------------------------------------------------------------------ + * cgse_defs.h - CGSE specific definiitions + * + * Copyright (c) 2007-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CGSE_DEFS_H__ +#define __CGSE_DEFS_H__ + +#include "spp_platform_common.h" +#include <cnat_cli.h> + + +#define CGSE_SVI_TYPE_CNAT 1 +#define CGSE_SVI_TYPE_XLAT 2 +#define CGSE_SVI_TYPE_NAT64_STATEFUL 3 +#define CGSE_SVI_TYPE_V6RD 4 +#define CGSE_SVI_TYPE_INFRA 5 +#define CGSE_SVI_TYPE_DS_LITE 7 +#define CGSE_SVI_TYPE_MAPE 9 + +#define CGSE_SET_TX_PKT_TYPE(type) PLATFORM_SET_CTX_RU_TX_PKT_TYPE(ctx, type) + +#define CGSE_INVALID_UIDX 0xffff /*invalid svi app uidb index */ +#define CGSE_INVALID_VRFID 0xffffffff /*invalid vrf id */ + +#define CGSE_VRF_MASK 0x3fff +#define CGSE_MAX_VRFMAP_ENTRIES (CGSE_VRF_MASK + 1) + +#define CGSE_VRFMAP_ENTRY_INVALID 0xffff + + +#define CGSE_INVALID_CGSE_ID (0) + +#define CGSE_TABLE_ENTRY_DELETED 0 +#define CGSE_TABLE_ENTRY_ACTIVE 1 +#define CGSE_TABLE_ENTRY_DORMANT 2 +#define CGSE_TABLE_ENTRY_INVALID_UIDB 3 + + +#define CGSE_CONFIG_HANDLER_DEBUG_PRINTF1(level, a) \ + if (cgse_config_debug_level > level) printf(a); + +#define CGSE_CONFIG_HANDLER_DEBUG_PRINTF2(level, a, b) \ + if (cgse_config_debug_level > level) printf(a, b); + +#define CGSE_CONFIG_HANDLER_DEBUG_PRINTF3(level, a, b, c) \ + if (cgse_config_debug_level > level) printf(a, b, c); + +#define CGSE_CONFIG_HANDLER_DEBUG_PRINTF4(level, a, b, c, d) \ + if (cgse_config_debug_level > level) printf(a, b, c, d); + +#define CGSE_CONFIG_HANDLER_DEBUG_PRINTF5(level, a, b, c, d, e) \ + if (cgse_config_debug_level > level) printf(a, b, c, d, e); + +#define CGSE_CONFIG_HANDLER_DEBUG_PRINTF6(level, a, b, c, d, e, f) \ + if (cgse_config_debug_level > level) printf(a, b, c, d, e, f); + +#define CGSE_CONFIG_HANDLER_DEBUG_PRINTF7(level, a, b, c, d, e, f, g) \ + if (cgse_config_debug_level > level) printf(a, b, c, d, e, f, g); + +#define CGSE_CONFIG_HANDLER_DEBUG_PRINTF8(level, a, b, c, d, e, f, g, h) \ + if (cgse_config_debug_level > level) printf(a, b, c, d, e, f, g, h); + +#define CGSE_CONFIG_HANDLER_DEBUG_PRINTF9(level, a, b, c, d, e, f, g, h, i) \ + if (cgse_config_debug_level > level) printf(a, b, c, d, e, f, g, h, i); + +extern u16 *cgse_uidb_index_cgse_id_mapping_ptr; + +#define CGSE_ADD_UIDB_INDEX_CGSE_ID_MAPPING(uidb_index, cgse_id) \ + *(cgse_uidb_index_cgse_id_mapping_ptr + uidb_index) = cgse_id; + +extern u8 my_instance_number; + +#endif diff --git a/vnet/vnet/vcgn/cnat_bulk_port.c b/vnet/vnet/vcgn/cnat_bulk_port.c new file mode 100644 index 00000000000..67ddd255e0f --- /dev/null +++ b/vnet/vnet/vcgn/cnat_bulk_port.c @@ -0,0 +1,964 @@ +/* + *------------------------------------------------------------------ + * cnat_bulk_ports.c - wrappers for bulk port allocation + * + * Copyright (c) 2011-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> +#include <vppinfra/bitmap.h> + +#include "cnat_db.h" +#include "cnat_config.h" +#include "cnat_global.h" +#include "cnat_logging.h" +#include "spp_timers.h" +#include "platform_common.h" +#include "cgn_bitmap.h" +#include "spp_platform_trace_log.h" +#include "cnat_ports.h" + +#ifndef NO_BULK_LOGGING + +#define PORT_TO_CACHE(y, z) ((y)/(z)) +/* The last bit (MSB) is used to indicate whether the cache entry is full */ +#define CACHE_TO_PORT(x, z) (((x)& 0x7FFF) * (z)) +#define IS_CACHE_ENTRY_FULL(x) ((x) & 0x8000) +#define MARK_CACHE_ENTRY_AS_FULL(x) ((x) = ((x) | 0x8000)) +#define UNMARK_CACHE_ENTRY_AS_FULL(x) ((x) = ((x) & 0x7FFF)) +#define CACHE_ENTRY_WITHOUT_FULL_STAT(x) ((x) & 0x7FFF) + + +#define NUM_BULK_CHECK 128 /* max number of previous chache to check. + * somewhat orbirtrary.. assume 64 as bulk size.. can handle up + * to 128*64 ports allocated by a single subscriber */ + +/* #define DEBUG_BULK_PORT 1 */ +/* #define DEBUG_BULK_PORT_DETAIL 1 */ +#define HAVE_BULK_PORT_STATS 1 + +#ifdef HAVE_BULK_PORT_STATS +static uword bulk_cache_hit_count; +static uword bulk_port_use_count; +static uword bulk_port_alloc_count; +static uword mapped_port_alloc_count; +#endif /* HAVE_BULK_PORT_STATS */ + +static u32 bulk_port_rand_across; + +void show_bulk_port_allocation(u16 in_vrfid, u32 inside_ip) +{ + cnat_db_key_bucket_t u_ki; + cnat_user_db_entry_t *udb; + int i; + u32 head; + cnat_main_db_entry_t *db = NULL; + i16 printed_so_far = 0; /* entries printed so far */ + u16 prev_bulks[NUM_BULK_CHECK]; + cnat_vrfmap_t *my_vrfmap = 0; + cnat_vrfmap_t *vrfmap = 0; + bulk_alloc_size_t bulk_size; + + u_ki.k.k.vrf = in_vrfid; + u_ki.k.k.ipv4 = inside_ip; + u_ki.k.k.port = 0; + + PLATFORM_DEBUG_PRINT("Searching for user %x in invrf %d\n", + inside_ip, in_vrfid); + udb = cnat_user_db_lookup_entry(&u_ki); + if(!udb) { + PLATFORM_DEBUG_PRINT("No such user\n"); return; + } + + pool_foreach (vrfmap, cnat_map_by_vrf, ({ + if(vrfmap->i_vrf == in_vrfid) { + my_vrfmap = vrfmap; + break; + }})); + + if(!my_vrfmap) { + PLATFORM_DEBUG_PRINT("Vrf map not found\n"); + return; + } + bulk_size = BULKSIZE_FROM_VRFMAP(my_vrfmap); + + if(bulk_size == BULK_ALLOC_SIZE_NONE) { + PLATFORM_DEBUG_PRINT("Bulk allocation not enabled\n"); + return; + } + + PLATFORM_DEBUG_PRINT("\nBulk cache for subscriber 0x%x: ", inside_ip); + for(i=0; i < BULK_RANGE_CACHE_SIZE; i++) { + PLATFORM_DEBUG_PRINT("%d , ", + CACHE_TO_PORT(udb->bulk_port_range_cache[i], bulk_size)); + } + PLATFORM_DEBUG_PRINT("\nNon cached bulk allocation for subscriber 0x%x:\n", + inside_ip); + ASSERT(udb); + memset(prev_bulks, 0,sizeof(prev_bulks)); + + head = udb->translation_list_head_index; + if(PREDICT_FALSE(head == EMPTY)) { + return; + } + db = cnat_main_db + head; + while (1) { + /* skip static ports - static ports may not belong to bulk pool*/ + if(db->out2in_key.k.port < cnat_static_port_range) goto next_entry; + + u16 bm_index = PORT_TO_CACHE(db->out2in_key.k.port, bulk_size); + + /*Check if we have already tested this bulk */ + for(i=0; i < printed_so_far; i++) { + if(prev_bulks[i] == bm_index) goto next_entry; + } + + /*Check if this base port is already part of cache */ + for(i=0; i < BULK_RANGE_CACHE_SIZE; i++) { + if(CACHE_ENTRY_WITHOUT_FULL_STAT(udb->bulk_port_range_cache[i]) + == bm_index) { + goto next_entry; + } + } + /* this is not in chache already */ + PLATFORM_DEBUG_PRINT("%d ", CACHE_TO_PORT(bm_index, bulk_size)); + if(printed_so_far < NUM_BULK_CHECK) { + prev_bulks[printed_so_far] = bm_index; + printed_so_far++; + } + +next_entry: + db = cnat_main_db + db->user_ports.next; + /* + * its a circular list, so if we have reached the head again + * all the entries for that user have been read + */ + if (db == (cnat_main_db + head)) { + break; + } + } /* while loop for db entries */ + + PLATFORM_DEBUG_PRINT("\n"); + return; +} + +void show_bulk_port_stats() +{ + + cnat_vrfmap_t *my_vrfmap = 0; + PLATFORM_DEBUG_PRINT("Bulk size settings of each inside vrf ...\n"); + pool_foreach (my_vrfmap, cnat_map_by_vrf, ({ + PLATFORM_DEBUG_PRINT("vrf id %d, bulk size %d\n", my_vrfmap->i_vrf, + BULKSIZE_FROM_VRFMAP(my_vrfmap)); + })); + +#ifdef HAVE_BULK_PORT_STATS + PLATFORM_DEBUG_PRINT("\nBulk port allocation, use and cache hit statistics\n"); + PLATFORM_DEBUG_PRINT("Number of times bulk ports allocated %lld\n", + bulk_port_alloc_count); + PLATFORM_DEBUG_PRINT("Number of times pre-allocated ports used %lld\n", + bulk_port_use_count); + PLATFORM_DEBUG_PRINT( + "Number of times pre-allocated bulk port found from cache %lld\n", + bulk_cache_hit_count); + PLATFORM_DEBUG_PRINT( + "Number of times mapped port (static) allocations made %lld\n", + mapped_port_alloc_count); +#else + PLATFORM_DEBUG_PRINT("\nNat44 bulk port statistics not turned on\n"); +#endif /* HAVE_BULK_PORT_STATS */ +} + +void clear_bulk_port_stats() +{ +#ifdef HAVE_BULK_PORT_STATS + bulk_port_alloc_count = 0; + bulk_port_use_count = 0; + bulk_cache_hit_count = 0; + mapped_port_alloc_count = 0; +#endif /* HAVE_BULK_PORT_STATS */ + return; +} + +void cnat_update_bulk_range_cache(cnat_user_db_entry_t *udb, u16 o_port, + bulk_alloc_size_t bulk_size) +{ + i16 i; + if(!udb) { +#ifdef DEBUG_BULK_PORT + PLATFORM_DEBUG_PRINT("%s, null udb!\n", __func__); +#endif + return; + } + if(BULK_ALLOC_SIZE_NONE == bulk_size) { /* no bulk logging */ + return; + } + + /* Take care of caching */ + if(o_port & 0x1) { + o_port--; + } + if(PREDICT_FALSE(o_port <= 0)) { +#ifdef DEBUG_BULK_PORT + PLATFORM_DEBUG_PRINT("%s invalid port: %d\n", __func__, o_port); +#endif + return; + } + + /* First preference is for the cache entry's that are not used yet */ + for(i=0; i < BULK_RANGE_CACHE_SIZE; i++) { + if(PREDICT_FALSE( + udb->bulk_port_range_cache[i] == (i16)BULK_RANGE_INVALID)) { + udb->bulk_port_range_cache[i] = PORT_TO_CACHE(o_port, bulk_size); + return; + } + } + + /* Now check if any cache entry is full and if it can be replaced */ + for(i=0; i < BULK_RANGE_CACHE_SIZE; i++) { + if(PREDICT_FALSE(IS_CACHE_ENTRY_FULL(udb->bulk_port_range_cache[i]))) { + udb->bulk_port_range_cache[i] = PORT_TO_CACHE(o_port, bulk_size); + return; + } + } + + return; +} + + +void cnat_port_free_v2_bulk ( + cnat_portmap_v2_t *pm, + int index, + port_pair_t ptype, + u16 base_port, + cnat_user_db_entry_t *udb, + u16 static_port_range, + bulk_alloc_size_t bulk_size, + int *nfv9_log_req) +{ + cnat_portmap_v2_t *my_pm; + i16 bm_index; + i16 i; + int unmark_full_status = 0; + + *nfv9_log_req = BULK_ALLOC_NOT_ATTEMPTED; + + /* First free up the port */ + cnat_port_free_v2(pm, index, ptype, base_port, static_port_range); + if(BULK_ALLOC_SIZE_NONE == bulk_size) /* no bulk logging */ + return; + if(PREDICT_FALSE(!udb)) { +#ifdef DEBUG_BULK_PORT + PLATFORM_DEBUG_PRINT("%s udb is null\n", __func__); +#endif + } + + if(PREDICT_FALSE(base_port < static_port_range)) { + return; + } + /* Now check if cache needs to be removed */ + my_pm = pm + index; + base_port = base_port/bulk_size; + base_port = base_port * bulk_size; /*Align it to multiples of bulk_size */ + if(PREDICT_TRUE(!cgn_clib_bitmap_check_if_all( + my_pm->bm, base_port, bulk_size))) { + *nfv9_log_req = CACHE_ALLOC_NO_LOG_REQUIRED; + unmark_full_status = 1; + /* One or more ports are still in use */ + } else { + *nfv9_log_req = base_port; /* logging required now. indicate base port*/ + } + bm_index = PORT_TO_CACHE(base_port, bulk_size); + /* Now check if this is in the cache */ + for(i=0; i < BULK_RANGE_CACHE_SIZE; i++) { + if(PREDICT_FALSE( + CACHE_ENTRY_WITHOUT_FULL_STAT(udb->bulk_port_range_cache[i])) + == bm_index) { + if(unmark_full_status) { + /* Unmark full stat.. if it was marked so..*/ + UNMARK_CACHE_ENTRY_AS_FULL(udb->bulk_port_range_cache[i]); + } else { + udb->bulk_port_range_cache[i] = (i16)BULK_RANGE_INVALID; +#ifdef DEBUG_BULK_PORT + PLATFORM_DEBUG_PRINT( + "Clearing cache for client 0x%x, bulk port %d\n", + my_pm->ipv4_address, base_port); +#endif + } + break; + } + } + return; +} + + +/* Get suitable port from range */ +static i16 get_suiting_port_pos_from_range(cnat_portmap_v2_t *my_pm, + u16 bulk_start, i16 bulk_size, port_pair_t pair_type) +{ + i16 num_pos, num_bits, iterations; + uword bulk_ports; + i16 inc; + i16 num_uwords = bulk_size/BITS(my_pm->bm[0]); + + if(PREDICT_FALSE(!num_uwords)) { + iterations = 0; + num_bits = bulk_size; + bulk_size = 0; + } else { + bulk_port_rand_across = randq1(bulk_port_rand_across); + iterations = bulk_port_rand_across % num_uwords; + num_bits = BITS(my_pm->bm[0]); + } + + do { + bulk_ports = cgn_clib_bitmap_get_bits(my_pm->bm, + (bulk_start + iterations * BITS(my_pm->bm[0])), num_bits); +#ifdef DEBUG_BULK_PORT_DETAIL + PLATFORM_DEBUG_PRINT("%s %d, bulk start %d, num_bits %d, ports %lld \n", + __func__, __LINE__, bulk_start, num_bits, bulk_ports); +#endif /* DEBUG_BULK_PORT_DETAIL */ + if(PREDICT_FALSE(!bulk_ports)) goto next_uword; + if(PREDICT_TRUE((pair_type == PORT_SINGLE) + || (pair_type == PORT_PAIR))) { + num_pos =0; + inc = 1; + } else if(pair_type == PORT_S_ODD) { + num_pos = 1; + inc = 2; + } else if(pair_type == PORT_S_EVEN) { + num_pos =0; + inc = 2; + } + + for(; num_pos < num_bits; num_pos = num_pos + inc) { + if(!((bulk_ports >> num_pos) & 1)) + continue; /* In use */ + /* Check if the available port meets our + * criteria such as add, even, pair etc */ + else if(PREDICT_FALSE( + (pair_type == PORT_PAIR) && ((num_pos & 0x1) || + (!((bulk_ports >> (num_pos + 1)) & 1))))) + continue; + else break; /* Found one that meets the criteria */ + } + if(num_pos < num_bits) + return (num_pos + iterations * BITS(my_pm->bm[0])); +next_uword: + num_bits = BITS(my_pm->bm[0]); + bulk_size -= BITS(my_pm->bm[0]); + iterations++; + if(iterations >= num_uwords) iterations = 0; + } while (bulk_size > 0); + + return -2; /* nothing found */ +} + +static cnat_errno_t try_bulk_port_from_non_cache( + cnat_user_db_entry_t *udb, + cnat_portmap_v2_t *my_pm, + port_pair_t pair_type, + bulk_alloc_size_t bulk_size, + u16 *port_available, + u16 static_port_range + ) +{ + /**** + 1. user should have existing translations.. otherwise, we wouldn't get here. + 2. For each, get the outside port. get the base port. + check if it is already in cache + 3. if not, we stand chance. + 4. Check for availability from this non cached pool. + 5. if found, repalce this with one of the cache that is invalid or full?? + 6. if we are replacing the cache.. it has to be governed by user + preference on prefer oldest pool or prefer newest pool + ********/ + u32 head; + cnat_main_db_entry_t *db = NULL; + u16 bulk_start; /* start point in 64 bitmap array to search for port */ + i16 port_pos; /* indicates the position of available port in bulk */ + i16 i; /* just a counter */ + i16 attempts_so_far = 0; /* (futile-;) attemps so far..*/ + u16 prev_bulks[NUM_BULK_CHECK]; + ASSERT(udb); + memset(prev_bulks, 0,sizeof(prev_bulks)); + + head = udb->translation_list_head_index; + if(PREDICT_FALSE(head == EMPTY)) return CNAT_NO_PRE_ALLOCATED_BULK_PORTS; + + db = cnat_main_db + head; + while (1) { //what should be the limit?? + + /* skip static ports - static ports may not belong to bulk pool*/ + if(db->out2in_key.k.port < static_port_range) goto next_entry; + + u16 bm_index = PORT_TO_CACHE(db->out2in_key.k.port, bulk_size); + + /*Check if we have already tested this bulk */ + for(i=0; i < attempts_so_far; i++) { + if(prev_bulks[i] == bm_index) { + goto next_entry; + } + } + + /*Check if this base port is already part of cache */ + for(i=0; i < BULK_RANGE_CACHE_SIZE; i++) { + if(CACHE_ENTRY_WITHOUT_FULL_STAT(udb->bulk_port_range_cache[i]) + == bm_index) + goto next_entry; + } + + /* this is not in chache already */ + bulk_start = CACHE_TO_PORT(bm_index, bulk_size); + port_pos = get_suiting_port_pos_from_range(my_pm, + bulk_start, bulk_size, pair_type); + + if(port_pos < 0) { /* no port available in this range */ + /* Mark this bulk so that we don't have to try this again */ + if(attempts_so_far < NUM_BULK_CHECK) { + prev_bulks[attempts_so_far] = bm_index; + attempts_so_far++; + } + goto next_entry; + } + + /* Got one...Get the port number */ + *port_available = bulk_start + port_pos; + + /* Check to see if we shoud replace one of the cache */ + for(i=0; i < BULK_RANGE_CACHE_SIZE; i++) { + if(PREDICT_FALSE((udb->bulk_port_range_cache[i] + == (i16)BULK_RANGE_INVALID) || ( + IS_CACHE_ENTRY_FULL(udb->bulk_port_range_cache[i])))) { + udb->bulk_port_range_cache[i] = bm_index; + return CNAT_SUCCESS; + } + } + /* Check to replace an existing (in use) entry */ + /* TODO: enforce policy */ + /* order of looping should depend on policy */ + + return CNAT_SUCCESS; + +next_entry: + db = cnat_main_db + db->user_ports.next; + /* + * its a circular list, so if we have reached the head again + * all the entries for that user have been read + */ + if (db == (cnat_main_db + head)) { + break; + } + } /* while loop for db entries */ + /* no ports available from pre allocated bulk pool */ + return CNAT_NO_PORT_FROM_BULK; +} + +cnat_errno_t +cnat_dynamic_port_alloc_v2_bulk ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range, + cnat_user_db_entry_t *udb, + bulk_alloc_size_t bulk_size, + int *nfv9_log_req, + u16 ip_n_to_1, + u32 *rseed_ip + ) +{ + + cnat_errno_t rv; + u16 port_available = 0; + i16 i; + cnat_portmap_v2_t *my_pm; + + if((BULK_ALLOC_SIZE_NONE != bulk_size) /* bulk logging enabled */ + && (udb)) { /* This user does have translations already */ + u16 bulk_start; + i16 port_pos; + + my_pm = pm + *index; + /* We have a case to check if bulk allocated ports can be used */ + /* TODO: order of looping to be based on policy + * like prefer older or prefer newer ?? + * For now, start with most recent cache entry + * so that we stand a better chance of + * finding a port + */ + for(i= 0; i < BULK_RANGE_CACHE_SIZE; i++) { + if(PREDICT_TRUE((udb->bulk_port_range_cache[i] == + (i16)BULK_RANGE_INVALID) || + IS_CACHE_ENTRY_FULL(udb->bulk_port_range_cache[i]))) { + continue; /* This range is not initialized yet or it is full */ + } + bulk_start = CACHE_TO_PORT(udb->bulk_port_range_cache[i], + bulk_size); + port_pos = get_suiting_port_pos_from_range(my_pm, + bulk_start, bulk_size, pair_type); + if(PREDICT_FALSE(port_pos < 0)) { + /* Mark this cache entry as full so that we do not + * waste time on this entry again */ + MARK_CACHE_ENTRY_AS_FULL(udb->bulk_port_range_cache[i]); +#ifdef DEBUG_BULK_PORT + PLATFORM_DEBUG_PRINT("Marked bulk cache entry %d as full for %x \n", + i, my_pm->ipv4_address); +#endif /* #ifdef DEBUG_BULK_PORT */ + continue; + } + /* Get the port number */ + port_available = bulk_start+ port_pos; +#ifdef DEBUG_BULK_PORT + PLATFORM_DEBUG_PRINT( + "Found port from cache : IP 0x%x, port %d %d iterations\n", + my_pm->ipv4_address, port_available, i) +#endif +#ifdef HAVE_BULK_PORT_STATS + bulk_cache_hit_count++; +#endif /* HAVE_BULK_PORT_STATS */ + break; + } /* end of for loop for cache check */ + /* If we have not found a port yet, check if we can have + * pre allocated bulk port from non-cache */ + if(PREDICT_FALSE(i == BULK_RANGE_CACHE_SIZE)) { + if( try_bulk_port_from_non_cache(udb, my_pm, pair_type, + bulk_size, &port_available, + static_port_range) != CNAT_SUCCESS ) { + goto ALLCOATE_NEW_BULK; + } +#ifdef DEBUG_BULK_PORT + PLATFORM_DEBUG_PRINT("Found port from non-cache : IP 0x%x, port %d\n", + my_pm->ipv4_address, port_available); +#endif + } + /* Assign the port, mark it as in use */ + cgn_clib_bitmap_clear_no_check(my_pm->bm, port_available); + (my_pm->inuse)++; + if(PREDICT_FALSE(pair_type == PORT_PAIR)) {/* Mark the next one too */ + cgn_clib_bitmap_clear_no_check(my_pm->bm, port_available + 1); + (my_pm->inuse)++; + } + *o_ipv4_address = my_pm->ipv4_address; + *o_port = port_available; + *nfv9_log_req = CACHE_ALLOC_NO_LOG_REQUIRED; +#ifdef HAVE_BULK_PORT_STATS + bulk_port_use_count++; +#endif /* HAVE_BULK_PORT_STATS */ + return (CNAT_SUCCESS); + } +ALLCOATE_NEW_BULK: +#ifdef DEBUG_BULK_PORT + if(BULK_ALLOC_SIZE_NONE != bulk_size) { + PLATFORM_DEBUG_PRINT( + "No port available from bulk cache, bulk size %d\n", bulk_size); + } +#endif + /* For whatever reason, we have not got a port yet */ + rv = cnat_dynamic_port_alloc_v2(pm, atype, pair_type, index, + o_ipv4_address, o_port, static_port_range, bulk_size, nfv9_log_req, + ip_n_to_1, rseed_ip); + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + return rv; + } + /* Take care of caching */ + if(PREDICT_FALSE(udb != NULL)) { + /* Predict false because, we usually allocate for new users */ + cnat_update_bulk_range_cache(udb, *o_port, bulk_size); + } +#ifdef HAVE_BULK_PORT_STATS + bulk_port_alloc_count++; +#endif /* HAVE_BULK_PORT_STATS */ + return (CNAT_SUCCESS); +} + + +cnat_errno_t +cnat_static_port_alloc_v2_bulk ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u32 i_ipv4_address, + u16 i_port, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range, + cnat_user_db_entry_t *udb, + bulk_alloc_size_t bulk_size, + int *nfv9_log_req, + u16 ip_n_to_1 + ) +{ + + /*** + * Requirements - + * 1. If the port allocated is below dyn start, it should be individual + * port (not bulk) + * 2. If NOT, it should be bulk allocated + * 3. Try and keep the inside port same as outside port in both the + * cases (best effort) + + * Algorithm + * 1. Check if it is below stat port start or user is new or bulk is + * disabled. If yes, call existing function + * 2. If not, see if we can pick from bulk and yet try to keep the port + * same - difficult thing - check if the port is free - then check if the + * entire bulk is free - if not check if bulk is owned by the user already. + * If all of these fail, call existing function to allocate a new bulk + * 3. Update cache, etc return log requirements + *****/ + + cnat_errno_t rv; + i16 i; + u32 head; + cnat_portmap_v2_t *my_pm; + uword bit_test_result, start_bit; + cnat_main_db_entry_t *db = NULL; + + if((BULK_ALLOC_SIZE_NONE != bulk_size) /* bulk logging enabled */ + && (udb) && /* This user does have translations already */ + i_port >= static_port_range ) { /* It is outside stat port range*/ + + my_pm = pm + *index; + /* We have a case to check if bulk allocated ports can be used */ + + /* First check if the required port is available. */ + if(PREDICT_FALSE(clib_bitmap_get_no_check(my_pm->bm, i_port) == 0)) { + goto ALLOCATE_NEW_BULK_STATIC; + } + + /* Port is free.. check if the bulk is also free */ + start_bit= ((i_port/bulk_size) * bulk_size); + bit_test_result = cgn_clib_bitmap_check_if_all(my_pm->bm, + start_bit, bulk_size); + if(PREDICT_TRUE(bit_test_result)) { /* bulk is available, grab it */ + goto ALLOCATE_NEW_BULK_STATIC; + } + + /* else, bulk is taken by someone. check if it is me */ + /* Check if we own the bulk by any chance */ + for(i=0; i < BULK_RANGE_CACHE_SIZE; i++) { + if(udb->bulk_port_range_cache[i] == start_bit) break; + } + if(i == BULK_RANGE_CACHE_SIZE) { /* no luck with cache */ + head = udb->translation_list_head_index; + if(PREDICT_FALSE(head == EMPTY)) + goto ALLOCATE_NEW_BULK_STATIC; + db = cnat_main_db + head; + i = 0; + while(1) { + if((db->out2in_key.k.port/bulk_size) * bulk_size == start_bit) { + i = 1; /* Just to indicate it is found */ + break; + } + db = cnat_main_db + db->user_ports.next; + /* + * its a circular list, so if we have reached the head again + * all the entries for that user have been read + */ + if (db == (cnat_main_db + head)) break; + } /* while loop for db entries */ + if(!i) { + goto ALLOCATE_NEW_BULK_STATIC; + } + } + /* Assign the port, mark it as in use */ + cgn_clib_bitmap_clear_no_check(my_pm->bm, i_port); + (my_pm->inuse)++; + *o_ipv4_address = my_pm->ipv4_address; + *o_port = i_port; + *nfv9_log_req = CACHE_ALLOC_NO_LOG_REQUIRED; +#ifdef HAVE_BULK_PORT_STATS + bulk_port_use_count++; +#endif /* HAVE_BULK_PORT_STATS */ + +#ifdef DEBUG_BULK_PORT + PLATFORM_DEBUG_PRINT("%s, %d, found stat port from bulk: %x, %d\n", + __func__, + __LINE__, *o_ipv4_address, *o_port); +#endif /* DEBUG_BULK_PORT */ + return (CNAT_SUCCESS); + } + +ALLOCATE_NEW_BULK_STATIC: +#ifdef DEBUG_BULK_PORT + PLATFORM_DEBUG_PRINT("%s No port available from bulk cache, bulk size %d\n", + __func__,bulk_size); +#endif + /* For whatever reason, we have not got a port yet */ + rv = cnat_static_port_alloc_v2(pm, atype, pair_type, i_ipv4_address, + i_port, index, o_ipv4_address, o_port, static_port_range, + bulk_size, nfv9_log_req,ip_n_to_1); + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + return rv; + } + /* Take care of caching only if it was a bulk alloc */ + if(PREDICT_FALSE(udb && (BULK_ALLOC_NOT_ATTEMPTED != *nfv9_log_req))) { + cnat_update_bulk_range_cache(udb, *o_port, bulk_size); + } +#ifdef HAVE_BULK_PORT_STATS + bulk_port_alloc_count++; +#endif /* HAVE_BULK_PORT_STATS */ + return (CNAT_SUCCESS); + +} + +cnat_errno_t +cnat_mapped_static_port_alloc_v2_bulk ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + u32 *index, + u32 ipv4_address, + u16 port, + cnat_user_db_entry_t *udb, + bulk_alloc_size_t bulk_size, + int *nfv9_log_req, + u16 ip_n_to_1 + ) +{ + /* Requirements : + * 1. Check if bulk allocation is required. + * 2. Call cnat_mapped_static_port_alloc_v2 to allocate + * 3. Decide if alloc has to be cached + * 4. Update nfv9_log_req + */ + cnat_errno_t rv; + rv = cnat_mapped_static_port_alloc_v2 (pm, + atype, index, ipv4_address, port, nfv9_log_req, bulk_size, ip_n_to_1); + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + return rv; + } + /* Take care of caching only if it was a bulk alloc */ + if(PREDICT_FALSE(udb && (BULK_ALLOC_NOT_ATTEMPTED != *nfv9_log_req))) { + int i; + port = port*bulk_size; + port = port/bulk_size; /* align it to bulk size boundary */ + for(i=0; i < BULK_RANGE_CACHE_SIZE; i++) { + if(CACHE_ENTRY_WITHOUT_FULL_STAT(udb->bulk_port_range_cache[i]) + == PORT_TO_CACHE(port, bulk_size)) + break; + } + if( i == BULK_RANGE_CACHE_SIZE) { /* else, it is alredy in cache */ + cnat_update_bulk_range_cache(udb, port, bulk_size); + } + } +#ifdef HAVE_BULK_PORT_STATS + mapped_port_alloc_count++; +#endif /* HAVE_BULK_PORT_STATS */ + return (CNAT_SUCCESS); +} + + +cnat_errno_t +cnat_dynamic_port_alloc_rtsp_bulk ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u16 i_port, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range, + cnat_user_db_entry_t *udb, + bulk_alloc_size_t bulk_size, + int *nfv9_log_req, + u32 *rseed_ip) +{ + + /*** + * Algorithm + * 1. Compute the range of ports required based on the number of digits + * in the port request made by the client. + * 2. Check if bulk logging is enabled. If not, use the existing method. + * 3. Check if there are 2 adjacent ports available that meet the above + * criteria in any of the bulk allocations made already. + * 4. If yes, mark them in use and return. + * 5. If not allocate a new bulk and pick 2 ports in it + ***/ + + i16 i; + cnat_portmap_v2_t *my_pm = 0; + u32 start_port1, end_port1, start_port2, end_port2; + int range_loop; + u16 bulk_start; + i16 port_pos; + u16 port_available = 0; + + ASSERT(index); + ASSERT(o_ipv4_address); + ASSERT(o_port); + + /* + * Check if the port is 4 digit or 5 digit. I am assuming we are + * not getting 3 (or 2 or 1) digit ports, which we cannot anyway + * allocate same sized outside ports - as outside ports start from 1024 + * + * Static Port has its own reserved range. Ensure that the range is + * such that atleast few 4 digit ports are available for RTSP. If + * not it does not make sense to do special allocation for RTSP. + */ + if (PREDICT_TRUE(static_port_range < MIN_STATIC_PORT_RANGE_FOR_RTSP)) { + /* + * 4 digit port or less + */ + if (i_port <= 9999) { + start_port1 = static_port_range; + end_port1 = 9999; + + start_port2 = 10000; + end_port2 = PORTS_PER_ADDR - 1; + } else { /* 5 digit port */ + start_port1 = 10000; + end_port1 = PORTS_PER_ADDR - 1; + + start_port2 = static_port_range; + end_port2 = 9999; + } + } else { /* Static port range is too big */ + start_port1 = static_port_range; + end_port1 = PORTS_PER_ADDR - 1; + + /* + * PORTS_PER_ADDR is just a placeholder for + * INVALID_PORT, valid ports are b/w 1 and PORTS_PER_ADDR + */ + start_port2 = PORTS_PER_ADDR; + end_port2 = PORTS_PER_ADDR; + } + + + if(PREDICT_TRUE(udb != NULL)) { + my_pm = pm + *index; + } + + /* Now check if this user already owns a bulk range that is + * within start range 1 + */ + + u32 start_range = start_port1; + u32 end_range = end_port1; + for(range_loop = 0; range_loop < 2; range_loop++) { + if((BULK_ALLOC_SIZE_NONE == bulk_size) || (!udb)) { + goto ALLOCATE_NEW_RTSP_PORTS; + } + for(i= 0; i < BULK_RANGE_CACHE_SIZE; i++) { + if(PREDICT_TRUE((udb->bulk_port_range_cache[i] == + (i16)BULK_RANGE_INVALID) || + IS_CACHE_ENTRY_FULL(udb->bulk_port_range_cache[i]))) { + continue; /* This range is not initialized yet or it is full */ + } + + bulk_start = CACHE_TO_PORT(udb->bulk_port_range_cache[i], + bulk_size); + if(bulk_start < start_port1 || bulk_start >= end_port1) { + continue; /* Not in the range */ + } + + port_pos = get_suiting_port_pos_from_range(my_pm, + bulk_start, bulk_size, pair_type); + if(PREDICT_FALSE(port_pos < 0)) { + /* Not Marking this cache entry as full as it failed + * for pair type. It might have individual entries + */ + continue; + } + /* Get the port number */ + port_available = bulk_start+ port_pos; +#ifdef DEBUG_BULK_PORT + PLATFORM_DEBUG_PRINT( + "Found port from cache : IP 0x%x, port %d %d iterations\n", + my_pm->ipv4_address, port_available, i) +#endif +#ifdef HAVE_BULK_PORT_STATS + bulk_cache_hit_count += 2; +#endif /* HAVE_BULK_PORT_STATS */ + break; + } /* end of for loop for cache check */ + + if(PREDICT_FALSE(i == BULK_RANGE_CACHE_SIZE)) { + /* we have not found a port yet, but to do not want to try + * non-cache bulks.. because, it is a very low probability and + * do not want to tweak that code for this special case + * The impact of non checking the non-cache is, we give this + * user few extra ports .. which is OK + */ + goto ALLOCATE_NEW_RTSP_PORTS; + } +#ifdef DEBUG_BULK_PORT + PLATFORM_DEBUG_PRINT("RTSP: Found port from non-cache : IP 0x%x, port %d\n", + my_pm->ipv4_address, port_available); +#endif + + /* Assign the port, mark it as in use */ + cgn_clib_bitmap_clear_no_check(my_pm->bm, port_available); + (my_pm->inuse)++; + cgn_clib_bitmap_clear_no_check(my_pm->bm, port_available + 1); + (my_pm->inuse)++; + + *o_ipv4_address = my_pm->ipv4_address; + *o_port = port_available; + *nfv9_log_req = CACHE_ALLOC_NO_LOG_REQUIRED; +#ifdef HAVE_BULK_PORT_STATS + bulk_port_use_count += 2; +#endif /* HAVE_BULK_PORT_STATS */ + return (CNAT_SUCCESS); + +ALLOCATE_NEW_RTSP_PORTS: + /* No luck. Let's try allocating new bulk.. */ + if(PREDICT_TRUE(CNAT_SUCCESS == cnat_dynamic_port_alloc_rtsp + (pm, atype, pair_type, + start_range, end_range,index, o_ipv4_address, + o_port, bulk_size, nfv9_log_req,rseed_ip))) { + if(PREDICT_FALSE(udb && + (BULK_ALLOC_NOT_ATTEMPTED != *nfv9_log_req))) { + cnat_update_bulk_range_cache(udb, *o_port, bulk_size); + } +#ifdef HAVE_BULK_PORT_STATS + bulk_port_alloc_count++; +#endif /* HAVE_BULK_PORT_STATS */ + return CNAT_SUCCESS; + } + + /* Could not allocate in range 1.. so move to range 2. */ + start_range = start_port2; + end_range = end_port2; + + } + + return (CNAT_NOT_FOUND_DIRECT); /* if we are here, we could not get any ports */ + +} + +#else /* Dummy definitions */ +void show_bulk_port_stats() +{ + PLATFORM_DEBUG_PRINT("\nBulk logging feature not included\n"); +} + + void clear_bulk_port_stats() +{ + PLATFORM_DEBUG_PRINT("\nBulk logging feature not included\n"); +} +#endif /* NO_BULK_LOGGING */ diff --git a/vnet/vnet/vcgn/cnat_bulk_port.h b/vnet/vnet/vcgn/cnat_bulk_port.h new file mode 100644 index 00000000000..3e48b9a7794 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_bulk_port.h @@ -0,0 +1,157 @@ +/* + *------------------------------------------------------------------ + * cnat_bulk_port_defs.h bulk port alloc definitions + * + * Copyright (c) 2011-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_BULK_PORT_H__ +#define __CNAT_BULK_PORT_H__ + +#ifndef NO_BULK_LOGGING +#include "cnat_bulk_port_defs.h" + +cnat_errno_t +cnat_dynamic_port_alloc_v2_bulk ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range, + cnat_user_db_entry_t *udb, + bulk_alloc_size_t bulk_size, + int *nfv9_log_req, + u16 ip_n_to_1, + u32 *rseed_ip); + +void cnat_update_bulk_range_cache(cnat_user_db_entry_t *udb, u16 o_port, + bulk_alloc_size_t bulk_size); + +void cnat_port_free_v2_bulk ( + cnat_portmap_v2_t *pm, + int index, + port_pair_t ptype, + u16 base_port, + cnat_user_db_entry_t *udb, + u16 static_port_range, + bulk_alloc_size_t bulk_size, + int *nfv9_log_req); + +cnat_errno_t cnat_static_port_alloc_v2_bulk ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u32 i_ipv4_address, + u16 i_port, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range, + cnat_user_db_entry_t *udb, + bulk_alloc_size_t bulk_size, + int *nfv9_log_req, + u16 ip_n_to_1 + ); + +cnat_errno_t cnat_dynamic_port_alloc_rtsp_bulk ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u16 i_port, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range, + cnat_user_db_entry_t *udb, + bulk_alloc_size_t bulk_size, + int *nfv9_log_req, + u32 *rseed_ip); + +cnat_errno_t +cnat_mapped_static_port_alloc_v2_bulk ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + u32 *index, + u32 ipv4_address, + u16 port, + cnat_user_db_entry_t *udb, + bulk_alloc_size_t bulk_size, + int *nfv9_log_req, + u16 ip_n_to_1 + ); + +#else /* NO_BULK_LOGGING */ +/* use older code */ +inline cnat_errno_t +cnat_dynamic_port_alloc_v2_bulk ( + cnat_vrfmap_t *vrf_map, + port_alloc_t atype, + port_pair_t pair_type, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range, + u16 ip_n_to_1, + u32 *rseed_ip + ) +{ + return cnat_dynamic_port_alloc_v2(vrf_map->portmap_list, atype, + pair_type, index, o_ipv4_address, o_port, static_port_range, + ip_n_to_1, rseed_ip); +} + +inline void cnat_port_free_v2_bulk ( + cnat_portmap_v2_t *pm, + int index, + port_pair_t ptype, + u16 base_port, + cnat_user_db_entry_t *udb, + u16 static_port_range); +{ + return cnat_port_free_v2(pm, index, ptype, base_port, + static_port_range); +} + +inline cnat_errno_t cnat_static_port_alloc_v2_bulk ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u32 i_ipv4_address, + u16 i_port, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range) +{ + return cnat_static_port_alloc_v2 (pm, atype, pair_type, + i_ipv4_address, i_port, index, o_ipv4_address, o_port); +} + +inline cnat_errno_t +cnat_mapped_static_port_alloc_v2_bulk ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + u32 *index, + u32 ipv4_address, + u16 port) +{ + return cnat_mapped_static_port_alloc_v2(pm, atype, index + ipv4_address, port); +} + +#endif /* NO_BULK_LOGGING */ +#endif /* __CNAT_BULK_PORT_H__ */ diff --git a/vnet/vnet/vcgn/cnat_bulk_port_defs.h b/vnet/vnet/vcgn/cnat_bulk_port_defs.h new file mode 100644 index 00000000000..edb47b0a8e1 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_bulk_port_defs.h @@ -0,0 +1,57 @@ +/* + *------------------------------------------------------------------ + * cnat_bulk_port_defs.h bulk port alloc definitions + * + * Copyright (c) 2011 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_BULK_PORT_DEFS_H__ +#define __CNAT_BULK_PORT_DEFS_H__ + + +#ifndef NO_BULK_LOGGING + +typedef enum { + BULK_ALLOC_SIZE_NONE = 1, + BULK_ALLOC_SIZE_16 = 16, + BULK_ALLOC_SIZE_32 = 32, + BULK_ALLOC_SIZE_64 = 64, + BULK_ALLOC_SIZE_128 = 128, + BULK_ALLOC_SIZE_256 = 256, + BULK_ALLOC_SIZE_512 = 512, + BULK_ALLOC_SIZE_1024 = 1024, + BULK_ALLOC_SIZE_2048 = 2048, + BULK_ALLOC_SIZE_4096 = 4096 +} bulk_alloc_size_t; + +/* #define DEBUG_BULK_PORT 1 TODO: remove this later */ + +#define CACHE_ALLOC_NO_LOG_REQUIRED -1 +#define BULK_ALLOC_NOT_ATTEMPTED -2 + +#define BULK_RANGE_INVALID 0xFFFF +#define BULK_RANGE_CACHE_SIZE 4 + +#define BULKSIZE_FROM_VRFMAP(vrfmap) ((vrfmap)->bulk_size) + +#define INIT_BULK_CACHE(udb) \ + { \ + int i; \ + for(i =0; i < BULK_RANGE_CACHE_SIZE; i++) \ + (udb)->bulk_port_range_cache[i] = (i16)BULK_RANGE_INVALID; \ + } + +#endif /* NO_BULK_LOGGING */ +#endif /* __CNAT_BULK_PORT_DEFS_H__ */ diff --git a/vnet/vnet/vcgn/cnat_cli.h b/vnet/vnet/vcgn/cnat_cli.h new file mode 100644 index 00000000000..e9d190a577a --- /dev/null +++ b/vnet/vnet/vcgn/cnat_cli.h @@ -0,0 +1,206 @@ +/* *------------------------------------------------------------------ + * cnat_cli.h - CLI definitions + * + * Copyright (c) 2007-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_CLI_H__ +#define __CNAT_CLI_H__ + +#include "cnat_config_api.h" +#include "cnat_show_api.h" + +/* from iox cli error */ +typedef enum { + CNAT_SUCCESS = 0, + CNAT_NO_CONFIG, + CNAT_NO_VRF_RUN, + CNAT_NO_POOL_ANY, + CNAT_NO_PORT_ANY, +#ifndef NO_BULK_LOGGING + CNAT_NO_PORT_FROM_BULK, + CNAT_NO_PRE_ALLOCATED_BULK_PORTS, +#endif /* NO_BULK_LOGGING */ + CNAT_BAD_INUSE_ANY, + CNAT_NOT_FOUND_ANY, + CNAT_INV_PORT_DIRECT, + CNAT_DEL_PORT_DIRECT, + CNAT_BAD_INUSE_DIRECT, + CNAT_NOT_FOUND_DIRECT, + CNAT_OUT_LIMIT, + CNAT_MAIN_DB_LIMIT, + CNAT_USER_DB_LIMIT, + CNAT_NOT_STATIC_PORT, + CNAT_BAD_STATIC_PORT_REQ, + CNAT_NOT_THIS_CORE, + CNAT_ERR_PARSER, + CNAT_ERR_INVALID_MSG_ID, + CNAT_ERR_INVALID_MSG_SIZE, + CNAT_ERR_INVALID_PAYLOAD_SIZE, + CNAT_ERR_BAD_TCP_UDP_PORT, + CNAT_ERR_BULK_SINGLE_FAILURE, + CNAT_ERR_XLAT_ID_INVALID, + CNAT_ERR_XLAT_V6_PREFIX_INVALID, + CNAT_ERR_XLAT_V4_PREFIX_INVALID, + CNAT_ERR_XLAT_TCP_MSS_INVALID, + CNAT_ERR_6RD_ID_INVALID, + CNAT_ERR_6RD_V4_TUNNEL_SRC_INVALID, + CNAT_ERR_6RD_V6_PREFIX_INVALID, + CNAT_ERR_6RD_V6_BR_UNICAST_INVALID, + CNAT_ERR_6RD_V4_PREFIX_MASK_LEN_INVALID, + CNAT_ERR_6RD_V4_SUFFIX_MASK_LEN_INVALID, + CNAT_ERR_6RD_V4_COMBO_MASK_LEN_INVALID, + CNAT_ERR_6RD_TUNNEL_MTU_INVALID, + CNAT_ERR_6RD_TUNNEL_TTL_INVALID, + CNAT_ERR_6RD_TUNNEL_TOS_INVALID, + CNAT_ERR_NAT64_NO_VRF_RUN, + CNAT_ERR_NAT64_ID_INVALID, + CNAT_ERR_NAT64_V6_PREFIX_INVALID, + CNAT_ERR_NAT64_V4_PREFIX_INVALID, + CNAT_ERR_NAT64_TCP_MSS_INVALID, +#ifdef CGSE_DS_LITE + CNAT_ERR_DS_LITE_ID_INVALID, +#endif /* CGSE_DS_LITE */ + CNAT_ERR_NO_SESSION_DB, + CNAT_ERR_MAPE_ID_INVALID, + CNAT_ERR_MAX +} cnat_errno_t; + +#define CNAT_TRUE 1 +#define CNAT_FALSE 0 + + +#define CNAT_DEBUG_NONE (0) +#define CNAT_DEBUG_GLOBAL_ERR (1 << 0) +#define CNAT_DEBUG_DROP_TCP (1 << 0) +#define CNAT_DEBUG_DROP_UDP (1 << 1) +#define CNAT_DEBUG_DROP_ICMP (1 << 2) +#define CNAT_DEBUG_ERR_TCP (1 << 3) +#define CNAT_DEBUG_ERR_UDP (1 << 4) +#define CNAT_DEBUG_ERR_ICMP (1 << 5) +#define CNAT_DEBUG_ERR_ALG (1 << 6) +#define CNAT_DEBUG_GLOBAL_ALL (1 << 7) +#define CNAT_DEBUG_FTP_ALG (1 << 8) + + + +#define CNAT_DEBUG_ALL 0x1FF /*all of above*/ +#define CNAT_DEBUG_ERR_ALL 0x38 + +#define CNAT_DB_CLEAR_SPECIFIC (0) +#define CNAT_DB_CLEAR_ALL (1 << 0) +#define CNAT_DB_CLEAR_VRF (1 << 1) +#define CNAT_DB_CLEAR_ADDR (1 << 2) +#define CNAT_DB_CLEAR_PROTO (1 << 3) +#define CNAT_DB_CLEAR_PORT (1 << 4) + + +#define MAX_UIDX 0x3fff /*the max svi app uidb index */ +/* address mask per core */ +#define ADDR_MASK_PER_CORE PLATFORM_ADDR_MASK_PER_CORE +#define ADDR_MASK_PER_CORE_PER_PARTITION \ + PLATFORM_ADDR_MASK_PER_CORE_PER_PARTITION + +#define MAX_CORES PLATFORM_MAX_CORES +#define MAX_CORES_PER_PARTITION PLATFORM_MAX_CORES_PER_PARTITION + +/* + * Maximum pool size that is supported by platform + */ +#define CNAT_MAX_ADDR_POOL_SIZE PLATFORM_CNAT_MAX_ADDR_POOL_SIZE +#define CNAT_MAX_ADDR_POOL_SIZE_PER_CORE \ + (CNAT_MAX_ADDR_POOL_SIZE / MAX_CORES_PER_PARTITION) + +#define BOUNDARY_VALUE 256 + +#define BOUNDARY_VALUE_MASK 0xff + +#define NUM_ADDR_IN_RANGE(range, value, instance) \ + ((range / value) + ((instance % MAX_CORES_PER_PARTITION) < (range%value) ? 1 : 0)) + +typedef enum { + CNAT_DEBUG_FLAGS_DUMP = 0, + CNAT_DEBUG_FLAG_UDP_INSIDE_CHECKSUM_DISABLE, + CNAT_DEBUG_FLAG_UDP_OUTSIDE_CHECKSUM_DISABLE, + CNAT_DEBUG_FLAG_UDP_OUTSIDE_PKT_DUMP_ENABLE, + CNAT_DEBUG_FLAG_UDP_INSIDE_PKT_DUMP_ENABLE, + CNAT_DEBUG_FLAG_ICMP_PKT_DUMP_ENABLE, + CNAT_DEBUG_FLAG_FRAG_PKT_DUMP_ENABLE, + CNAT_DEBUG_FLAG_CONFIG_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_GLOBAL_DEBUG_ALL_ENABLE, + CNAT_DEBUG_FLAG_SUMMARY_STATS_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_SHOW_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_XLAT_CONFIG_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_XLAT_DATA_PATH_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_TCP_LOGGING_ENABLE, + CNAT_DEBUG_FLAG_NFV9_LOGGING_DUMP_ENABLE, + CNAT_DEBUG_FLAG_SYSLOG_LOGGING_DUMP_ENABLE, + CNAT_DEBUG_SET_STATIC_PORT_RANGE, + CNAT_DEBUG_FLAG_V6RD_DATA_PATH_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_V6RD_CONFIG_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_V6RD_DEFRAG_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_NAT64_CONFIG_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_NAT64_DATA_PATH_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_DSLITE_DP_ENABLE, + CNAT_DEBUG_FLAG_DSLITE_CONFIG_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_CONFIG_PPTP_ENABLE = 24, + CNAT_DEBUG_FLAG_CONFIG_PCP_ENABLE = 25, + CNAT_DEBUG_FLAG_MAPE_CONFIG_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_MAPE_DATA_PATH_DEBUG_ENABLE, + CNAT_DEBUG_FLAG_MAX, +} cnat_debug_variable_value; + +/* + * Don't use too small values for PATH MTU + */ +#define MIN_NFV9_PATH_MTU 100 + +extern u32 global_debug_flag; +extern u16 debug_i_vrf; +extern u32 debug_i_flag; +extern u32 debug_i_addr_start; +extern u32 debug_i_addr_end; +extern u16 debug_o_vrf; +extern u32 debug_o_flag; +extern u32 debug_o_addr_start; +extern u32 debug_o_addr_end; +extern u32 tcp_logging_enable_flag; +extern u32 nfv9_logging_debug_flag; + +extern u32 udp_inside_checksum_disable; +extern u32 udp_outside_checksum_disable; +extern u32 udp_inside_packet_dump_enable; +extern u32 udp_outside_packet_dump_enable; + +extern u32 icmp_debug_flag; +extern u32 frag_debug_flag; + +extern u32 summary_stats_debug_flag; + +extern u32 config_debug_level; +extern u32 show_debug_level; + + +/* CLI API prototypes called from vcgn_classify.c */ +extern void cnat_nat44_add_vrf_map_t_handler(spp_api_cnat_v4_add_vrf_map_t *mp, + vlib_main_t *vm); +extern void cnat_nat44_handle_show_stats(vlib_main_t *vm); +extern void cnat_nat44_handle_show_config(vlib_main_t *vm); +extern void cnat_nat44_set_protocol_timeout_value(u16 active, + u16 init, u8 *proto, u8 reset, vlib_main_t *vm); +extern void cnat_v4_show_inside_entry_req_t_handler +(spp_api_cnat_v4_show_inside_entry_req_t *mp, vlib_main_t *vm); + +#endif /* __CNAT_CLI_H__ */ diff --git a/vnet/vnet/vcgn/cnat_cli_handler.c b/vnet/vnet/vcgn/cnat_cli_handler.c new file mode 100644 index 00000000000..d50f522a2e0 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_cli_handler.c @@ -0,0 +1,947 @@ +/* *------------------------------------------------------------------ + * cnat_cli_handler.c - CLI handler definitions + * + * Copyright (c) 2007-2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> +#include <vppinfra/clib.h> +#include <vppinfra/error.h> + +#include "cnat_db.h" +#include "cnat_config.h" +#include "cnat_global.h" +#include "cnat_cli.h" +#include "cnat_logging.h" +#include "cnat_syslog.h" +#include "cnat_config_api.h" +#include "cnat_show_api.h" +#include "cnat_show_response.h" + +#include <arpa/inet.h> + +u32 show_debug_level = 0; + +u32 +cnat_get_vrfmap_nfv9_logging_index (u32 i_vrf_id) +{ + cnat_nfv9_logging_info_t *my_nfv9_logging_info = 0; + u32 logging_index = EMPTY; + + /* + * Start with global logging index if available + */ + if (cnat_nfv9_global_info.cnat_nfv9_init_done) { + logging_index = cnat_nfv9_global_info.cnat_nfv9_global_collector_index; + + pool_foreach (my_nfv9_logging_info, cnat_nfv9_logging_info_pool, ({ + if (my_nfv9_logging_info->i_vrf_id == i_vrf_id) { + logging_index = my_nfv9_logging_info - + cnat_nfv9_logging_info_pool; + break; + } + })); + } + return (logging_index); +} + +u32 +cnat_get_vrfmap_syslog_logging_index (u32 i_vrf_id) +{ + cnat_syslog_logging_info_t *my_syslog_info = NULL; + u32 logging_index = EMPTY; + + /* + * Start with global logging index if available + */ + if(PREDICT_TRUE(cnat_syslog_global_info.cnat_syslog_init_done)) { + + pool_foreach (my_syslog_info, cnat_syslog_logging_info_pool, ({ + if (my_syslog_info->i_vrf_id == i_vrf_id) { + logging_index = my_syslog_info - + cnat_syslog_logging_info_pool; + break; + } + })); + } + return (logging_index); +} + +void +cnat_set_vrf_params_with_default(cnat_vrfmap_t *my_vrfmap, u32 i_vrf, u32 i_vrf_id) +{ + + my_vrfmap->status = S_WAO; + + my_vrfmap->i_vrf = i_vrf; + my_vrfmap->i_vrf_id = i_vrf_id; + + my_vrfmap->o_vrf = INVALID_UIDX; + my_vrfmap->o_vrf_id = INVALID_VRFID; + +#ifndef NO_BULK_LOGGING + BULKSIZE_FROM_VRFMAP(my_vrfmap) = BULK_ALLOC_SIZE_NONE; +#endif /* #ifndef NO_BULK_LOGGING */ + my_vrfmap->tcp_mss = V4_TCP_MSS_NOT_CONFIGURED_VALUE; + my_vrfmap->frag_tout = CNAT_IPV4_FRAG_TIMEOUT_DEF; + my_vrfmap->port_limit = V4_DEF_VRF_MAX_PORTS; + my_vrfmap->nfv9_logging_index = + cnat_get_vrfmap_nfv9_logging_index(i_vrf_id); + my_vrfmap->syslog_logging_index = + cnat_get_vrfmap_syslog_logging_index(i_vrf_id); + + /* Copy logging policy from nfv9 info. */ + if(my_vrfmap->nfv9_logging_index != EMPTY) { + cnat_nfv9_logging_info_t *nfv9_logging_info = + cnat_nfv9_logging_info_pool + my_vrfmap->nfv9_logging_index; + my_vrfmap->nf_logging_policy = nfv9_logging_info->logging_policy; + } + if(my_vrfmap->syslog_logging_index != EMPTY) { + cnat_syslog_logging_info_t *syslog_logging_info = + cnat_syslog_logging_info_pool + my_vrfmap->syslog_logging_index; + my_vrfmap->syslog_logging_policy = syslog_logging_info->logging_policy; + } + #if 0 + printf("Initializing params in cnat_set_vrf_params_with_default\n" + "my_vrfmap->status = %u\n" + "my_vrfmap->tcp_mss = %u\n" + "my_vrfmap->i_vrf = %u\n" + "my_vrfmap->i_vrf_id = %u\n" + "my_vrfmap->o_vrf = %u\n" + "my_vrfmap->o_vrf_id = %u\n" + "my_vrfmap->bulk_size = %u\n" + "my_vrfmap->nfv9_logging_index = %u\n" + "my_vrfmap->syslog_logging_index = %u\n" + "my_vrfmap->frag_tout = %u\n" + "my_vrfmap->port_limit = %u\n" + "my_vrfmap->nf_logging_policy = %u\n" + "my_vrfmap->syslog_logging_policy = %u\n", + my_vrfmap->status, + my_vrfmap->tcp_mss, + my_vrfmap->i_vrf, + my_vrfmap->i_vrf_id, + my_vrfmap->o_vrf, + my_vrfmap->o_vrf_id, + my_vrfmap->bulk_size, + my_vrfmap->nfv9_logging_index, + my_vrfmap->syslog_logging_index, + my_vrfmap->frag_tout, + my_vrfmap->port_limit, + my_vrfmap->nf_logging_policy, + my_vrfmap->syslog_logging_policy); + #endif /* if 0 */ +} + +/* config command handlers */ +void cnat_nat44_add_vrf_map_t_handler(spp_api_cnat_v4_add_vrf_map_t *mp, + vlib_main_t *vm) +{ + void cnat_table_entry_fill_map(u32 start_addr, u32 end_addr, + cnat_portmap_v2_t **port_map_holder); + u32 start_addr, end_addr; + u32 pm_len __attribute__((unused)); + cnat_vrfmap_t *my_vrfmap = 0; + cnat_portmap_v2_t *pm = 0; + u16 i_vrf, o_vrf; + u32 ivrf_id, ovrf_id; + u16 my_vrfmap_index; + u8 i = 0; + + start_addr = mp->start_addr[0]; + end_addr = mp->end_addr[0]; + i_vrf = mp->i_vrf; + o_vrf = mp->o_vrf; + ovrf_id = mp->o_vrf_id; + ivrf_id = mp->i_vrf_id; + +#if DEBUG_NOT_COMMENTED + vlib_cli_output(vm, "%s: saddr[0x%x], eaddr[0x%x], i_vrf[0x%x], o_vrf[0x%x], " + "ovrf_id[0x%x], ivrf_id[0x%x]\n", __func__, start_addr, end_addr, + i_vrf, o_vrf, ovrf_id, ivrf_id); +#endif + if (start_addr > end_addr) { + vlib_cli_output(vm, "Add VRF Map failed start addr 0x%x > end addr 0x%x\n", + start_addr, end_addr); + return; + } + if ((end_addr - start_addr) > CNAT_MAX_ADDR_POOL_SIZE) { + vlib_cli_output(vm, "Add VRF Map failed start addr 0x%x - end addr " + "0x%x range > 65536\n", start_addr, end_addr); + return; + } + my_vrfmap_index = vrf_map_array[i_vrf]; + + if (my_vrfmap_index != VRF_MAP_ENTRY_EMPTY) { + + my_vrfmap = cnat_map_by_vrf + my_vrfmap_index; + + my_vrfmap->o_vrf = o_vrf; + my_vrfmap->i_vrf_id = ivrf_id; + my_vrfmap->o_vrf_id = ovrf_id; + } else { + /* + * first time add + */ + pool_get(cnat_map_by_vrf, my_vrfmap); + memset(my_vrfmap, 0, sizeof(*my_vrfmap)); + /* waiting for outside vrf */ + cnat_set_vrf_params_with_default(my_vrfmap, i_vrf, ivrf_id); + my_vrfmap->i_vrf = i_vrf; + my_vrfmap->o_vrf = o_vrf; + my_vrfmap->i_vrf_id = ivrf_id; + my_vrfmap->o_vrf_id = ovrf_id; +#ifndef NO_BULK_LOGGING + BULKSIZE_FROM_VRFMAP(my_vrfmap) = BULK_ALLOC_SIZE_NONE; +#endif /* #ifndef NO_BULK_LOGGING */ + + my_vrfmap->tcp_mss = V4_TCP_MSS_NOT_CONFIGURED_VALUE; + my_vrfmap->status = S_WA; + my_vrfmap->frag_tout = 0; /* currently setting it to 0 */ + my_vrfmap->port_limit = V4_DEF_VRF_MAX_PORTS; + vrf_map_array[i_vrf] = (my_vrfmap - cnat_map_by_vrf); + } + pm = my_vrfmap->portmap_list; + pm_len = vec_len(pm); + for(i=0; i < 1 ; i++) { + start_addr = mp->start_addr[i]; + end_addr = mp->end_addr[i]; + if((start_addr == 0) || (end_addr == 0)) + break; + + cnat_table_entry_fill_map(start_addr, end_addr, + &(my_vrfmap->portmap_list)); + } + my_vrfmap->status = S_RUN; + vlib_cli_output(vm, "Address Pool Config Successful !!\n"); + return; +} + +void cnat_nat44_set_protocol_timeout_value(u16 active, + u16 init, u8 *proto, u8 reset, vlib_main_t *vm) +{ + if (!strncmp((char *) proto, "tcp", 3)) { + tcp_initial_setup_timeout = (reset) ? V4_DEF_TCP_IS_TO : init; + tcp_active_timeout = (reset) ? V4_DEF_TCP_AS_TO : active; + + } else if (!strncmp((char *) proto, "udp", 3)) { + udp_init_session_timeout = (reset) ? V4_DEF_UDP_IS_TO : init; + udp_act_session_timeout = (reset) ? V4_DEF_UDP_AS_TO : active; + + } else if (!strncmp((char *) proto, "icmp", 4)) { + icmp_session_timeout = (reset) ? V4_DEF_ICMP_S_TO : active; + + } else { + vlib_cli_output(vm, "Error !! Unsupported protocol %s\n", proto); + } + return; +} + + + + +/* Show command handlers */ +void cnat_nat44_handle_show_stats(vlib_main_t *vm) +{ + pool_header_t *h; + u32 used, free; + cnat_vrfmap_t *my_vrfmap =0; + cnat_portmap_v2_t *pm =0, *my_pm = 0; + u32 i, pm_len; + struct in_addr ip; + void cnat_nfv9_show_collector + (vlib_main_t *vm, cnat_nfv9_logging_info_t *my_nfv9_logging_info); + + /* active translations */ + h = pool_header(cnat_main_db); + free = vec_len(h->free_indices); + used = vec_len(cnat_main_db) - free; + + vlib_cli_output(vm, "vCGN NAT44 Statistics :\n"); + vlib_cli_output(vm, "\tActive Translations : %u\n", + NAT44_COMMON_STATS.active_translations); + vlib_cli_output(vm, "\tTotal free address : %u\n", free); + vlib_cli_output(vm, "\tTotal used address : %u\n", used); + vlib_cli_output(vm, "\ti2o drops due to port limit exceeded : %lu\n", + in2out_drops_port_limit_exceeded); + vlib_cli_output(vm, "\ti2o drops due to system limit reached : %lu\n", + in2out_drops_system_limit_reached); + vlib_cli_output(vm, "\ti2o drops due to resource depletion : %lu\n", + in2out_drops_resource_depletion); + vlib_cli_output(vm, "\ti2o drops due to no translations : %lu\n", + NAT44_COMMON_STATS.no_translation_entry_drops); + + vlib_cli_output(vm, "\tPool address usage:\n"); + vlib_cli_output(vm, "\t-------------------------------------------------\n"); + vlib_cli_output(vm, "\tExternal Address \tPorts Used\n"); + vlib_cli_output(vm, "\t-------------------------------------------------\n"); + + used = 0; + pool_foreach (my_vrfmap, cnat_map_by_vrf, ({ + pm = my_vrfmap->portmap_list; + pm_len = vec_len(pm); + for (i = 0; i < pm_len; i++) { + my_pm = pm + i; + if (my_pm->inuse) { + used++; + /* maximum of 200 addresses to be returned */ + if (used <= 200) { + ip.s_addr = ntohl(my_pm->ipv4_address); + vlib_cli_output(vm, "\t%s \t\t%u\n", inet_ntoa(ip), my_pm->inuse); + } + } + } + })); + return; +} + +void cnat_nat44_handle_show_config(vlib_main_t *vm) +{ + cnat_vrfmap_t * my_vrfmap; + cnat_portmap_v2_t *pm = 0; + cnat_portmap_v2_t *my_pm = 0; + u32 pm_len; + struct in_addr ip_addr; + u8 status_str[20]; + cnat_nfv9_logging_info_t *my_nfv9_logging_info, + *global_nfv9_logging_info = 0; + void cnat_nfv9_show_collector + (vlib_main_t *vm, cnat_nfv9_logging_info_t *my_nfv9_logging_info); + + vlib_cli_output(vm, "vCGN NAT44 Config:\n"); + vlib_cli_output(vm, "\tPort Limit : %u\n", cnat_main_db_max_ports_per_user); + vlib_cli_output(vm, "\ttotal address pool : %u\n", total_address_pool_allocated); + vlib_cli_output(vm, "\tdynamic port start range : %u\n", cnat_static_port_range); + + pool_foreach(my_vrfmap, cnat_map_by_vrf, ({ + vlib_cli_output(vm, "\ti-intf-index : 0x%x\n", my_vrfmap->i_vrf); + vlib_cli_output(vm, "\to-intf-index : 0x%x\n", my_vrfmap->o_vrf); + + memset(status_str, 0x00, sizeof(status_str)); + switch(my_vrfmap->status) { + case S_WAO: memcpy(status_str, "S_WAO", 5); break; + case S_WA: memcpy(status_str, "S_WA", 4); break; + case S_WO: memcpy(status_str, "S_WO", 4); break; + case S_RUN: memcpy(status_str, "S_RUN", 5); break; + case S_DEL: memcpy(status_str, "S_DEL", 5); break; + default: memcpy(status_str, "Invalid state", 13); + + } + vlib_cli_output(vm, + "\tvrf map table status : %s\n", status_str); + + pm = my_vrfmap->portmap_list; + pm_len = vec_len(pm); + my_pm = pm; + ip_addr.s_addr = clib_net_to_host_u32(my_pm->ipv4_address); + vlib_cli_output(vm, + "\tStart Address : %s\n", inet_ntoa(ip_addr)); + my_pm = pm + (pm_len - 1); + ip_addr.s_addr = clib_net_to_host_u32(my_pm->ipv4_address); + vlib_cli_output(vm, + "\tEnd Address : %s\n", inet_ntoa(ip_addr)); + + })); + vlib_cli_output(vm, + "\ttcp init timeout : %u sec\n", tcp_initial_setup_timeout); + vlib_cli_output(vm, + "\ttcp active timeout : %u sec\n", tcp_active_timeout); + vlib_cli_output(vm, + "\tudp init timeout : %u sec\n", udp_init_session_timeout); + vlib_cli_output(vm, + "\tudp active timeout : %u sec\n", udp_act_session_timeout); + vlib_cli_output(vm, + "\ticmp session timeout: %u sec\n", icmp_session_timeout); + +#if 0 + if (cnat_nfv9_global_info.cnat_nfv9_global_collector_index != EMPTY) { + vlib_cli_output(vm,"\nGloabal NFV9 Collector :"); + global_nfv9_logging_info = cnat_nfv9_logging_info_pool + + cnat_nfv9_global_info.cnat_nfv9_global_collector_index; + cnat_nfv9_show_collector(vm, global_nfv9_logging_info); + } +#endif + + vlib_cli_output(vm, "\nNFV9 Collector :"); + if (cnat_nfv9_logging_info_pool !=NULL) { + pool_foreach (my_nfv9_logging_info, cnat_nfv9_logging_info_pool, ({ + if (my_nfv9_logging_info != global_nfv9_logging_info) { + cnat_nfv9_show_collector(vm, my_nfv9_logging_info); + vlib_cli_output(vm, "\n"); + } + })); + } else { + vlib_cli_output(vm, "\n"); + } + + return; +} + +/* + * Check if the request flag matches the entry flags and + * if so return "1" + * + * entry_flag_ptr is an output parameter - it returns the flags + * corresponding to the translation entry + */ +static u8 cnat_v4_show_verify_display_entry ( + u16 request_flag, + cnat_main_db_entry_t *db, + u16 *entry_flag_ptr) +{ + u8 display_entry = 0; + + /* + * This should never happen + */ + if (!entry_flag_ptr) { + return (display_entry); + } + + *entry_flag_ptr = 0; + + if ((db->flags & CNAT_DB_FLAG_STATIC_PORT) + &&(db->flags & CNAT_DB_FLAG_ALG_ENTRY)) { + *entry_flag_ptr |= CNAT_TRANSLATION_ENTRY_STATIC; + *entry_flag_ptr |= CNAT_TRANSLATION_ENTRY_ALG; + } else if (db->flags & CNAT_DB_FLAG_STATIC_PORT) { + *entry_flag_ptr |= CNAT_TRANSLATION_ENTRY_STATIC; + } else if ((db->flags & CNAT_DB_FLAG_ALG_ENTRY) || + (db->flags & CNAT_DB_FLAG_PPTP_GRE_ENTRY)) { + *entry_flag_ptr |= CNAT_TRANSLATION_ENTRY_ALG; + } else if (db->flags & CNAT_DB_FLAG_PCPI) { + *entry_flag_ptr |= CNAT_TRANSLATION_ENTRY_PCPI_DYNAMIC; + } else if (db->flags & CNAT_DB_FLAG_PCPE) { + *entry_flag_ptr |= CNAT_TRANSLATION_ENTRY_PCPE_DYNAMIC; + } else { + *entry_flag_ptr |= CNAT_TRANSLATION_ENTRY_DYNAMIC; + } + + if (request_flag == CNAT_TRANSLATION_ENTRY_ALL) { + display_entry = 1; + } else { + /* + * Check if the request_flag is STATIC or ALG + * and the entry is STATIC or ALG as well + */ + if ((request_flag & CNAT_TRANSLATION_ENTRY_STATIC) && + (*entry_flag_ptr & CNAT_TRANSLATION_ENTRY_STATIC)) { + display_entry = 1; + } + + if ((request_flag & CNAT_TRANSLATION_ENTRY_ALG) && + (*entry_flag_ptr & CNAT_TRANSLATION_ENTRY_ALG)) { + display_entry = 1; + } + + if ((request_flag & CNAT_TRANSLATION_ENTRY_PCPI_DYNAMIC) && + (*entry_flag_ptr & CNAT_TRANSLATION_ENTRY_PCPI_DYNAMIC)) { + display_entry = 1; + } + + if ((request_flag & CNAT_TRANSLATION_ENTRY_PCPE_DYNAMIC) && + (*entry_flag_ptr & CNAT_TRANSLATION_ENTRY_PCPE_DYNAMIC)) { + display_entry = 1; + } + + /* + * For dynamic entry case, check if flags field is 0 + */ + if ((request_flag & CNAT_TRANSLATION_ENTRY_DYNAMIC) && + (*entry_flag_ptr & CNAT_TRANSLATION_ENTRY_DYNAMIC)) { + display_entry = 1; + } + } + + if (PREDICT_FALSE(show_debug_level > 2)) { + PLATFORM_DEBUG_PRINT("Entry (0x%x, %d) -> (0x%x, %d) request_flag 0x%x, entry_flag 0x%x, display_entry %d\n", db->in2out_key.k.ipv4, db->in2out_key.k.port, db->out2in_key.k.ipv4, db->out2in_key.k.port, request_flag, *entry_flag_ptr, display_entry); + } + + return (display_entry); +} +void cnat_v4_show_inside_entry_req_t_handler +(spp_api_cnat_v4_show_inside_entry_req_t *mp, vlib_main_t * vm) +{ + cnat_user_db_entry_t *udb = NULL; + cnat_main_db_entry_t *db = NULL; + cnat_db_key_bucket_t u_ki, ki; + u64 a, b, c; + u32 index; + u16 start_port, end_port, port; + u16 request_flag = 0; + u16 entry_flag = 0; + u8 num_entries = 0; + u8 proto, all; + u8 done = 0; + cnat_v4_show_translation_entry *entry_list; + cnat_v4_show_translation_entry entry[PLATFORM_MAX_TRANSLATION_ENTRIES]; + u8 display_entry; + u8 flag_str[11]; + + ki.k.k.ipv4 = mp->ipv4_addr; + ki.k.k.vrf = mp->vrf_id; + start_port = mp->start_port; + end_port = mp->end_port; + //memset(flag_str,0x00,11); + //strncpy(flag_str,"NA",2); +#if DEBUG + vlib_cli_output(vm, "## proto %d, inside-addr 0x%x, start_port %u, " + "end_port %u, vrf 0x%x, flag 0x%x\n", + mp->protocol, + mp->ipv4_addr, + mp->start_port, + mp->end_port, + mp->vrf_id, + mp->flags); +#endif + + proto = mp->protocol; + ki.k.k.vrf |= ((u16)proto << CNAT_PRO_SHIFT); + + all = mp->all_entries; /* for no port range case */ + request_flag = mp->flags; /* for all, alg, static entries case */ + entry_list = entry; + + /* + * check if the address is belonging to this core + */ + + + /* + * first we check if the user exists in the udb, if he is not then + * it does not make sense to check the main db for translations + */ + u_ki.k.k.vrf = ki.k.k.vrf & CNAT_VRF_MASK; + u_ki.k.k.ipv4 = ki.k.k.ipv4; + u_ki.k.k.port = 0; + + if (PREDICT_FALSE(show_debug_level > 0)) { + vlib_cli_output(vm, "\nI_TRANS_CORE %d: IPv4 0x%x, VRF 0x%x, " + "start_port %d, end_port %d", + my_instance_number, ki.k.k.ipv4, + ki.k.k.vrf, start_port, end_port); + } + + udb = cnat_user_db_lookup_entry(&u_ki); + if (!udb) { + if (PREDICT_FALSE(show_debug_level > 0)) { + vlib_cli_output(vm, "\nReturning %d entries", + num_entries); + } + return; + } + + if (all) { + #if 0 + if (PREDICT_FALSE(show_debug_level > 0)) { + PLATFORM_DEBUG_PRINT("\nI_TRANS: Printing ALL\n"); + } + + /* + * get the head of list of translation entries for that user + * from the user db + */ + head = udb->translation_list_head_index; + db = cnat_main_db + head; + + while (num_entries < PLATFORM_MAX_TRANSLATION_ENTRIES) { + + if (((db->in2out_key.k.vrf & CNAT_PRO_MASK) >> CNAT_PRO_SHIFT) + != proto) { + goto next_entry; + } + + display_entry = + spp_api_cnat_v4_show_verify_display_entry(request_flag, db, + &entry_flag); + + if (display_entry) { + entry_list->ipv4_addr = + spp_host_to_net_byte_order_32(db->out2in_key.k.ipv4); + entry_list->cnat_port = + spp_host_to_net_byte_order_16(db->out2in_key.k.port); + entry_list->src_port = + spp_host_to_net_byte_order_16(db->in2out_key.k.port); + + entry_list->protocol = proto; + + /* incase of gre - in2out is not accounted */ + if(proto != CNAT_PPTP) { + + entry_list->in2out_packets = + spp_host_to_net_byte_order_32(db->in2out_pkts); + } else { + entry_list->in2out_packets = 0; + } + entry_list->out2in_packets = + spp_host_to_net_byte_order_32(db->out2in_pkts); + + entry_list->flags = + spp_host_to_net_byte_order_16(entry_flag); + + num_entries++; + entry_list = entry_list + 1; + } +next_entry: + db = cnat_main_db + db->user_ports.next; + /* + * its a circular list, so if we have reached the head again + * all the entries for that user have been read + */ + if (db == (cnat_main_db + head)) { + break; + } + } + resp->num_entries = num_entries; + #endif /* if 0 */ + } else { + if (PREDICT_FALSE(show_debug_level > 0)) { + vlib_cli_output(vm, "\nI_TRANS: Printing range %d .. %d\n", + start_port, end_port); + } + /* + * port range is specified so for each port calculate the hash and + * check if the entry is present in main db + */ + port = start_port; + done = 0; + while ((!done) && (num_entries < PLATFORM_MAX_TRANSLATION_ENTRIES)) { + + ki.k.k.port = port; + if (port >= end_port) { + done = 1; + } else { + port++; + } + CNAT_V4_GET_HASH(ki.k.key64, + ki.bucket, + CNAT_MAIN_HASH_MASK); + index = cnat_in2out_hash[ki.bucket].next; + if (PREDICT_TRUE(index == EMPTY)) { + continue; + } + + do { + db = cnat_main_db + index; + if (db->in2out_key.key64 == ki.k.key64) { + break; + } + index = db->in2out_hash.next; + } while (index != EMPTY); + + if (index == EMPTY) { + continue; + } else { + + display_entry = + cnat_v4_show_verify_display_entry(request_flag, db, + &entry_flag); + if (display_entry) { + + entry_list->ipv4_addr = + clib_host_to_net_u32(db->out2in_key.k.ipv4); + entry_list->cnat_port = + clib_host_to_net_u16(db->out2in_key.k.port); + entry_list->src_port = + clib_host_to_net_u16(db->in2out_key.k.port); + + entry_list->protocol = proto; + entry_list->nsessions = db->nsessions; + entry_list->flags = ((db->flags & CNAT_DB_FLAG_TCP_ACTIVE) || + (db->flags & CNAT_DB_FLAG_UDP_ACTIVE)) ? 1:0; + /* incase of gre - in2out is not accounted */ + if(proto != CNAT_PPTP) { + entry_list->in2out_packets = + clib_host_to_net_u32(db->in2out_pkts); + } else { + entry_list->in2out_packets = 0; + } + + entry_list->out2in_packets = + clib_host_to_net_u32(db->out2in_pkts); + + if (PREDICT_FALSE(show_debug_level > 3)) { + vlib_cli_output(vm, "\n1. Entry: Addr 0x%x, port %d, num_entries %d", + clib_net_to_host_u32(entry_list->ipv4_addr), + clib_net_to_host_u16(entry_list->cnat_port), + num_entries); + } + + entry_list = entry_list + 1; + num_entries++; + } + } /* if (index == EMPTY) */ + } /* while() */ + } + + if (PREDICT_FALSE(show_debug_level > 0)) { + if (num_entries) { + vlib_cli_output(vm, "\nReturning %d entries\n", + num_entries); + } + } + + entry_list = entry; + u8 i = 0; + struct in_addr ip; + u8 proto_str[10]; + u8 transl_str[10]; + memset(proto_str, 0x00, 10); + memset(transl_str, 0x00, 10); + + if (proto == 1) strncpy((char *)proto_str, "udp", 3); + else if (proto == 2) strncpy((char *)proto_str, "tcp", 3); + else if (proto == 3) strncpy((char *)proto_str, "icmp", 4); + else strncpy((char *)proto_str, "unknown", 7); + + if (request_flag == 0x04) strncpy((char *)transl_str, "Dynamic", 7); + else strncpy((char *)transl_str, "Unknown", 7); /* currently we are not supporting static/alg entries */ + + ip.s_addr = clib_net_to_host_u32(u_ki.k.k.ipv4); + + vlib_cli_output (vm, "Inside-translation details\n"); + vlib_cli_output (vm, "--------------------------\n"); + + vlib_cli_output (vm, "Inside interface index : 0x%x\n", u_ki.k.k.vrf); + vlib_cli_output (vm, "Inside address : %s\n", inet_ntoa(ip)); + vlib_cli_output (vm, "Start port : %u\n", start_port); + vlib_cli_output (vm, "End port : %u\n", end_port); + + vlib_cli_output (vm, "--------------------------------------------------------------------------------------" + "-----------------------\n"); + vlib_cli_output (vm, "Outside Protocol Inside Outside Translation" + " I2O O2I Flag Num\n"); + vlib_cli_output (vm, "Address Src Port Src Port Type " + " Pkts Pkts Sessions\n"); + vlib_cli_output (vm, "--------------------------------------------------------------------------------------" + "-----------------------\n"); + + while ((num_entries) && (entry_list) && (i < 50)) { + + ip.s_addr = entry_list->ipv4_addr; + memset(flag_str,0x00,11); + if((proto == 1) || (proto == 2)) { + if(entry_list->flags == 1) { + strncpy((char *)flag_str,"Active",6); + } + else { + strncpy((char *) flag_str,"Non Active",10); + } + } else { + strncpy((char *) flag_str, "NA", 2); + } + vlib_cli_output(vm, "%s %10s %11u %12u %13s %10u %10u %14s %6u\n", + inet_ntoa(ip), proto_str, + clib_net_to_host_u16(entry_list->src_port), + clib_net_to_host_u16(entry_list->cnat_port), + transl_str, + clib_net_to_host_u32(entry_list->in2out_packets), + clib_net_to_host_u32(entry_list->out2in_packets), + flag_str, + entry_list->nsessions); + entry_list++; + num_entries--; i++; + } + + return; +} + +void cnat_v4_show_outside_entry_req_t_handler +(spp_api_cnat_v4_show_outside_entry_req_t *mp, vlib_main_t *vm) +{ + cnat_main_db_entry_t *db = NULL; + cnat_db_key_bucket_t ko; + u64 a, b, c; + u32 index; + u16 start_port, end_port, port; + u16 request_flag = 0; + u16 entry_flag = 0; + u8 num_entries = 0; + u8 proto; + cnat_v4_show_translation_entry *entry_list; + cnat_v4_show_translation_entry entry[PLATFORM_MAX_TRANSLATION_ENTRIES]; + u8 done = 0; + u8 display_entry; + u8 flag_str[11]; + + ko.k.k.ipv4 = mp->ipv4_addr; + ko.k.k.vrf = mp->vrf_id; + start_port = mp->start_port; + end_port = mp->end_port; + + proto = mp->protocol; + request_flag = mp->flags; + + ko.k.k.vrf |= ((u16)proto << CNAT_PRO_SHIFT); + + entry_list = entry; + + if (PREDICT_FALSE(show_debug_level > 0)) { + vlib_cli_output(vm, "\nO_TRANS_CORE %d: IPv4 0x%x, VRF 0x%x, " + "start_port %d, end_port %d", my_instance_number, + ko.k.k.ipv4, ko.k.k.vrf, start_port, end_port); + } + + /* + * for each ip and port combination we need to scan the main db + * and check if the entry is present in main db + */ + port = start_port; + done = 0; + while ((!done) && (num_entries < PLATFORM_MAX_TRANSLATION_ENTRIES)) { + ko.k.k.port = port; + + /* + * If we have reached the end_port, we are DONE + */ + if (port >= end_port) { + done = 1; + } else { + port++; + } + + CNAT_V4_GET_HASH(ko.k.key64, + ko.bucket, + CNAT_MAIN_HASH_MASK); + + index = cnat_out2in_hash[ko.bucket].next; + if (PREDICT_TRUE(index == EMPTY)) { + continue; + } + + do { + db = cnat_main_db + index; + if (db->out2in_key.key64 == ko.k.key64) { + break; + } + index = db->out2in_hash.next; + } while (index != EMPTY); + + if (index == EMPTY) { + continue; + } else { + display_entry = + cnat_v4_show_verify_display_entry(request_flag, db, + &entry_flag); + + if (display_entry) { + entry_list->ipv4_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + entry_list->cnat_port = + clib_host_to_net_u16(db->out2in_key.k.port); + entry_list->src_port = + clib_host_to_net_u16(db->in2out_key.k.port); + entry_list->protocol = proto; + entry_list->nsessions = db->nsessions; + entry_list->flags = ((db->flags & CNAT_DB_FLAG_TCP_ACTIVE) || + (db->flags & CNAT_DB_FLAG_UDP_ACTIVE)) ? 1:0; + /* incase of gre - in2out is not accounted */ + if(proto != CNAT_PPTP) { + entry_list->in2out_packets = + clib_host_to_net_u32(db->in2out_pkts); + } else { + entry_list->in2out_packets = 0 ; + } + entry_list->out2in_packets = + clib_host_to_net_u32(db->out2in_pkts); + #if 0 + entry_list->flags = + clib_host_to_net_u16(entry_flag); + #endif + entry_list = entry_list + 1; + num_entries++; + } + } + } + + if (num_entries == 0) { + /* No point proceeding further */ + return; + } + + if (PREDICT_FALSE(show_debug_level > 0)) { + if (num_entries) { + vlib_cli_output(vm, "\nO_TRANS: Core %d returning %d entries", + num_entries); + } + } + + entry_list = entry; + u8 i = 0; + struct in_addr ip; + u8 proto_str[10]; + u8 transl_str[10]; + memset(proto_str, 0x00, 10); + memset(transl_str, 0x00, 10); + + if (proto == 1) strncpy((char *) proto_str, "udp", 3); + else if (proto == 2) strncpy((char *) proto_str, "tcp", 3); + else if (proto == 3) strncpy((char *) proto_str, "icmp", 4); + else strncpy((char *) proto_str, "unknown", 7); + + if (request_flag == 0x04) strncpy((char *) transl_str, "Dynamic", 7); + else strncpy((char *)transl_str, "Unknown", 7); /* currently we are not supporting static/alg entries */ + + ip.s_addr = clib_net_to_host_u32(ko.k.k.ipv4); + + vlib_cli_output (vm, "Outside-translation details\n"); + vlib_cli_output (vm, "--------------------------\n"); + + vlib_cli_output (vm, "Outside interface index : 0x%x\n", (ko.k.k.vrf & CNAT_VRF_MASK)); + vlib_cli_output (vm, "Outside address : %s\n", inet_ntoa(ip)); + vlib_cli_output (vm, "Start port : %u\n", start_port); + vlib_cli_output (vm, "End port : %u\n", end_port); + + vlib_cli_output (vm, "--------------------------------------------------------------------------------------" + "-----------------------\n"); + vlib_cli_output (vm, "Inside Protocol Outside Inside Translation" + " I2O O2I Flag Num\n"); + vlib_cli_output (vm, "Address Dst Port Dst Port Type " + " Pkts Pkts Sessions\n"); + vlib_cli_output (vm, "--------------------------------------------------------------------------------------" + "-----------------------\n"); + + while ((num_entries) && (entry_list) && (i < 50)) { + ip.s_addr = entry_list->ipv4_addr; + memset(flag_str,0x00,11); + if((proto == 1) || (proto == 2)) { + if(entry_list->flags == 1) { + strncpy((char *) flag_str,"Active",6); + } + else { + strncpy((char *) flag_str,"Non Active",10); + } + } else { + strncpy((char *) flag_str, "NA", 2); + } + vlib_cli_output(vm, "%s %10s %11u %12u %13s %10u %10u %14s %6u\n", + inet_ntoa(ip), proto_str, + clib_net_to_host_u16(entry_list->cnat_port), + clib_net_to_host_u16(entry_list->src_port), + transl_str, + clib_net_to_host_u32(entry_list->in2out_packets), + clib_net_to_host_u32(entry_list->out2in_packets), + flag_str, + entry_list->nsessions); + entry_list++; + num_entries--; i++; + + } + return; +} diff --git a/vnet/vnet/vcgn/cnat_common_api.h b/vnet/vnet/vcgn/cnat_common_api.h new file mode 100644 index 00000000000..a4eb74432f2 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_common_api.h @@ -0,0 +1,22 @@ +/*--------------------------------------------------------------------------- + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +#ifndef __CNAT_COMMON_API_H__ +#define __CNAT_COMMON_API_H__ + +/* All common API prototypes */ +void cnat_scanner_db_process_turn_on(vlib_main_t *vm); + +#endif diff --git a/vnet/vnet/vcgn/cnat_config.c b/vnet/vnet/vcgn/cnat_config.c new file mode 100644 index 00000000000..87183dfa961 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_config.c @@ -0,0 +1,77 @@ +/* + *------------------------------------------------------------------ + * cnat_config.c - configuration definitions + * + * Copyright (c) 2007-2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include "cnat_config.h" +#include "cnat_cli.h" +#include "cnat_v4_pptp_alg.h" +#include "platform_common.h" + +/* session timeout */ + +u16 tcp_initial_setup_timeout = V4_DEF_TCP_IS_TO; /* sec */ +u16 tcp_active_timeout = V4_DEF_TCP_AS_TO; /* sec */ +u16 udp_init_session_timeout = V4_DEF_UDP_IS_TO; /* 30 sec */ +u16 udp_act_session_timeout = V4_DEF_UDP_AS_TO; /* 2 min */ +u16 icmp_session_timeout = V4_DEF_ICMP_S_TO; /* 60 sec */ + +cnat_pptp_config_t pptp_cfg = + { + .enable = PPTP_DISABLED, + .timeout = PPTP_GRE_TIMEOUT + } ; + +/* This flag is used as indication of timeout related config + * changes and hence db needs to be updated + */ +u8 timeout_dirty_flag = 0; + +/* mapping refresh direction, + * 1 inbound and outbound refresh + */ +u8 mapping_refresh_both_direction = V4_DEF_ENABLE; + +u16 cnat_main_db_max_ports_per_user = V4_DEF_MAX_PORTS; + +u32 cnat_main_db_icmp_rate_limit = DEF_RATE_LIMIT; +u32 cnat_main_db_icmp_rate_limit_core = DEF_RATE_LIMIT_CORE; +u32 crc_zero_udp_rate_limit_core = RATE_LIMIT_UDP_CORE; +u16 cnat_static_port_range = CNAT_DEF_STATIC_PORT_RANGE; + + +/* + * ftp alg enable + */ +u8 ftp_alg_enabled = V4_DEF_DISABLE; +u16 rtsp_alg_port_num = 0; + +/* + * load balancing debug mode + */ +u8 lb_debug_enable = V4_DEF_DISABLE; + + +/* good or evil mode + * 0 endpoint-independnet filter, good mode + * 1 address depedent filter, evil mode + */ +u8 address_dependent_filtering = V4_DEF_DISABLE; + +u16 per_user_icmp_msg_limit = ICMP_MSG_RATE_LIMIT; + +u16 config_delete_timeout = V4_CONFIG_DELETE_TO; + diff --git a/vnet/vnet/vcgn/cnat_config.h b/vnet/vnet/vcgn/cnat_config.h new file mode 100644 index 00000000000..f104273716f --- /dev/null +++ b/vnet/vnet/vcgn/cnat_config.h @@ -0,0 +1,582 @@ +/* + *------------------------------------------------------------------ + * cnat_config.h - configuration database definitions + * + * Copyright (c) 2007-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_CONFIG_H__ +#define __CNAT_CONFIG_H__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +#include "cnat_bulk_port_defs.h" + +/* default policy value */ +#define V4_DEF_ICMP_S_TO 60 /*icmp session timeout */ +#define V4_DEF_UDP_IS_TO 30 /*udp init session timeout */ +#define V4_DEF_UDP_AS_TO 120 /*udp active session timeout */ +#define V4_DEF_TCP_IS_TO 120 /*tcp init session timeout */ +#define V4_DEF_TCP_AS_TO 1800 /*tcp active session timeout, 30 min */ +#define V4_DEF_TCP_MSS 1460 /*tcp mss */ +#define V4_DEF_MAX_PORTS 100 /*max port limit per user */ +#define DEF_RATE_LIMIT PLATFORM_MAX_CORES /* No of packets/sec icmp generated */ +#define DEF_RATE_LIMIT_CORE 1 /* No of packets/sec icmp generated (per core) */ +#define RATE_LIMIT_UDP_CORE 1000 /* Max allowed udp crc zero packets/sec/core */ + +#define NAT44_RESERVED_INST_ID 1 +#define DSLITE_START_ID (NAT44_RESERVED_INST_ID + 1) +#define V4_DEF_VRF_MAX_PORTS 0 /*max port limit per vrf user; + 0 means use the global port limit for user*/ +/*Hardcoded . TBD - can be made configurable */ + +#define V4_DEF_ENABLE 1 /* feature enable */ +#define V4_DEF_DISABLE 0 /* feature disable */ + +#define CNAT_DEF_STATIC_PORT_RANGE 1024 /* Default range for static ports */ +/* + * If TCP MSS is not configured, store the maximum possible value + */ +#define V4_TCP_MSS_NOT_CONFIGURED_VALUE 0xffff + +/* default timeout for fragments in seconds set to 2 + * in case its not configured + */ +#define CNAT_IPV4_FRAG_TIMEOUT_DEF 2 +/* other */ +/* max db entries to be scaned */ +#define MAX_DB_ENTRY_PER_SCAN PLATFORM_MAX_DB_ENTRY_PER_SCAN +/* max db entries selected per scan */ +#define MAX_DB_ENTRY_SELECTED_PER_SCAN PLATFORM_MAX_DB_ENTRY_SELECTED_PER_SCAN + +#define ICMP_MSG_RATE_LIMIT 3 /* rate limit for icmp message */ +#define V4_CONFIG_DELETE_TO 600 /* timeout for entry to be deleted */ + +/* session timeout */ + +extern u16 tcp_initial_setup_timeout; +extern u16 tcp_active_timeout; +extern u16 udp_init_session_timeout; +extern u16 udp_act_session_timeout; +extern u16 icmp_session_timeout; + +extern u8 timeout_dirty_flag; + +/* mapping refresh direction, + * 0 outbound only refresh, + * 1 inbound and outbound refresh + */ +extern u8 mapping_refresh_both_direction; + + +extern u16 cnat_main_db_max_ports_per_user; +extern u32 cnat_main_db_icmp_rate_limit; +extern u32 cnat_main_db_icmp_rate_limit_core; +extern u32 crc_zero_udp_rate_limit_core; + +extern u16 cnat_static_port_range; + +typedef enum { + LOG_FORMAT_UNDEFINED =0, + LOG_FORMAT_COMPACT, + LOG_FORMAT_NF9, + LOG_FORMAT_MAX, /* keep this as last */ +} log_format_t; + +typedef enum { + CNAT_CONFIG_DEL_OP = 0, + CNAT_CONFIG_ADD_OP, +} cnat_op_flag_t; + +extern u8 ftp_alg_enabled; +extern u16 rtsp_alg_port_num; + +/* + * load balancing debug mode + */ +extern u8 lb_debug_enable; + +/* good or evil mode + * 0 endpoint-independnet filter, good mode + * 1 address depedent filter, evil mode + */ +extern u8 address_dependent_filtering; + +extern u16 per_user_icmp_msg_limit; + +/* vrfmap or portmap holding time + * after delete + */ +extern u16 config_delete_timeout; + +/* + * Bit map for various configuration in the POLICY KNOB case + */ +#define BIDIR_REFRESH_ENABLE 0x01 +#define BIDIR_REFRESH_DISABLE 0x02 +#define FTP_ALG_ENABLE 0x04 +#define FTP_ALG_DISABLE 0x08 +#define DEFAULT_NFV9_LOGGING_SERVER_ENABLE 0x10 +#define DEFAULT_NFV9_LOGGING_SERVER_DISABLE 0x20 + + +/* + * This structure contains a single VRF map configuration + * from a bulk message. This structure is in conformanace + * with the following structures defined in cnat_config_api.h + * - spp_api_cnat_v4_bulk_vrf_map_t + * + * Any change in the above structures should be propagated here + */ +typedef struct _spp_api_cnat_v4_single_vrf_map_req { + u32 i_vrf_id; + u32 o_vrf_id; + + u16 i_vrf; + u16 o_vrf; + + u32 start_addr; + u32 end_addr; + + u16 vrf_policy_enable; +#define TCP_MSS_ENABLE 0x0001 +#define TCP_MSS_DISABLE 0x0002 +#define NFV9_LOGGING_ENABLE 0x0004 +#define NFV9_LOGGING_DISABLE 0x0008 +#define VRF_MAP_DELETE 0x0010 +#define VRF_MAP_ADD 0x0020 +#define BULK_ALLOC_CHANGE 0x0040 + + u16 tcp_mss_value; + u32 vrf_nfv9_logging_ipv4_address; + u16 vrf_nfv9_logging_udp_port; + u16 vrf_nfv9_refresh_rate; + u16 vrf_nfv9_timeout_rate; + u16 vrf_nfv9_path_mtu; +#ifndef NO_BULK_LOGGING + bulk_alloc_size_t bulk_size; +#endif /* NO_BULK_LOGGING */ +} spp_api_cnat_v4_single_vrf_map_req; + +typedef struct _spp_api_cnat_v4_single_vrf_map_rc { + u8 vrf_map_rc; + u8 tcp_mss_rc; + u8 nfv9_logging_rc; + u8 pad; +} spp_api_cnat_v4_single_vrf_map_rc; + +/* + * Bulk Response for the VRF map request + */ +typedef struct _spp_api_cnat_v4_bulk_vrf_map_resp { + u16 _spp_msg_id; + u8 bulk_rc; + u8 pad; + + u32 num_vrfmap_entries; + + spp_api_cnat_v4_single_vrf_map_rc vrf_map_rc; + +} spp_api_cnat_v4_bulk_vrf_map_resp; + +/* + * Bulk Response for the Policy Knob request + */ +typedef struct _spp_api_cnat_v4_bulk_policy_knob_resp { + u16 _spp_msg_id; + u8 bulk_rc; /* Global rc code */ + u8 pad; + + u8 port_limit_rc; + u8 icmp_timeout_rc; + u8 udp_init_timeout_rc; + u8 udp_act_timeout_rc; + + u8 tcp_init_timeout_rc; + u8 tcp_act_timeout_rc; + u8 nfv9_logging_rc; + u8 pad2; +} spp_api_cnat_v4_bulk_policy_knob_resp; + + +/* PPTP ALG defs and structures */ + +/* dont change the order.. + maintened at offset mapped to msg ids */ + +typedef struct pptp_ctrl_msg_ctrs_t { + u64 dummy; + u64 sccr; + u64 sccrp; + u64 stccrq; + u64 stccrp; + u64 erq; + u64 erp; + u64 ocrq; + u64 ocrp; + u64 icrq; + u64 icrp; + u64 iccn; + u64 cclr; + u64 cdn; + u64 wen; + u64 sli; +}pptp_ctrl_msg_ctrs_t; + +#define PPTP_INCR(ctr) pptp_cfg.counters.pptp_##ctr++ +#define PPTP_DECR(ctr) pptp_cfg.counters.pptp_##ctr-- + +typedef struct pptp_counters_t { + + u64 pptp_ctrl_msg_drops; + u64 pptp_active_tunnels; + u64 pptp_active_channels; + u64 pptp_in2out_gre_drops; + u64 pptp_out2in_gre_drops; + u64 pptp_in2out_gre_fwds; + u64 pptp_out2in_gre_fwds; + pptp_ctrl_msg_ctrs_t ctrl_ctrs; + +} pptp_counters_t; + +#define CNAT_PPTP_ENABLE 1 +#define CNAT_PPTP_DEF_TIMEOUT 60 /* secs */ + +typedef struct cnat_pptp_config_t { + u8 enable; + u16 timeout; + pptp_counters_t counters; + +} cnat_pptp_config_t; + + +#define CNAT_PPTP_ENABLE_FLAG 0x01 +#define CNAT_PPTP_TIMEOUT_FLAG 0x02 + +/* pptp config msg resp */ +typedef struct _spp_api_cnat_v4_config_pptp_alg_resp { + u16 _spp_msg_id; + u8 bulk_rc; + u8 pad; + +} spp_api_cnat_v4_config_pptp_alg_resp_t; + +typedef struct { + u16 msg_id; + u8 rc; + u8 pad[5]; + + /* better to have a group structures rather than individual + variables, any change in counters is will automatically + reflect here */ + pptp_counters_t counters; +} pptp_show_counters_resp_t ; + + +extern cnat_pptp_config_t pptp_cfg; + + +/* ========= 6RD declarations =============================== */ + +#define V6RD_ENTRY_DELETE 0x00 +#define IPV4_TUNNEL_SRC_CONFIG 0x04 +#define TUNNEL_MTU_CONFIG 0x08 +#define IPV4_PREFIXMASK_LEN_CONFIG 0x10 +#define IPV4_SUFFIXMASK_LEN_CONFIG 0x20 +#define TTL_CONFIG 0x40 +#define TOS_CONFIG 0x80 +#define V6RD_IPV6_PREFIX_CONFIG 0x100 +#define V6RD_RESET_DF_BIT_CONFIG 0x200 +#define V6RD_UNICAST_ADDR_CONFIG 0x400 +#define V6RD_REASSEMB_CONFIG 0x800 + +#define TTL_ENABLE 0x1 +#define TOS_ENABLE 0x2 +#define RESET_DF_BIT 0x4 +#define REASSEMBLY_ENABLE 0x8 + +/* ========= 6RD declarations =============================== */ + +/* + * Single Request for XLAT config + */ +typedef struct _spp_api_cnat_v4_single_xlat_config_req { + + /* + * Indicates the xlat instance id - How big will this value be + * Can we restrict it between 0..255, that way the APP code + * can use an array to store the xlat instances. + */ + u32 xlat_id; + +#define XLAT_ENTRY_DELETE 0x0000 +#define IPV6_SVI_IF_NUM_CONFIG 0x0001 +#define IPV4_SVI_IF_NUM_CONFIG 0x0002 +#define IPV4_TO_IPV6_TCP_MSS_CONFIG 0x0004 +#define IPV6_TO_IPV4_TCP_MSS_CONFIG 0x0008 +#define IPV6_PREFIX_CONFIG 0x0010 +#define IPV6_UBIT_ON_CONFIG 0x0020 +#define IPV6_NON_TRANSLATABLE_PREFIX_MAP_CONFIG 0x0040 +#define IPV4_TOS_SETTING_CONFIG 0x0080 +#define IPV6_TOS_SETTING_CONFIG 0x0100 +#define IPV4_DFBIT_CLEAR_CONFIG 0x0200 +#define ICMPV6_PTB_MTU_SET_CONFIG 0x0400 +#define IPV6_NON_TRANSLATABLE_PREFIX_MAP_ALG_CONFIG 0x0800 +#define CPE_V4_PREFIX_CONFIG 0x1000 /* for map-t */ +#define CPE_V6_PREFIX_CONFIG 0x2000 /* for map-t */ +#define EXTERNAL_V6_PREFIX_CONFIG 0x4000 /* for map-t */ +#define PORT_SHARING_RATIO_CONFIG 0x8000 /* for map-t */ +#define CONSECUTIVE_PORTS_CONFIG 0x10000 /* for map-t */ + + u32 xlat_config_fields_enable; + + /* + * If numbers of the IPv6 and IPv4 SVI interfaces + */ + u32 ipv6_svi_if_num; + u32 ipv4_svi_if_num; + + /* + * TCP MSS values for the 2 XLAT directions + */ + u16 v4_to_v6_tcp_mss; + u16 v6_to_v4_tcp_mss; + + /* + * XLAT IPv6 prefix + */ + u32 v6_prefix[4]; + + /* + * XLAT IPv6 prefix mask + */ + u8 v6_prefix_mask_len; + + /* + * Set to non-zero if UBITs are reserved + */ +#define UBITS_ON 0x01 +#define IPV4_DF_BIT_CLEAR 0x02 +#define ICMPV6_MTU_SET 0x04 +#define IPV4_TOS_SET_ENABLED 0x08 +#define IPV6_TC_SET_ENABLED 0x10 + + u8 feature_enable_bits; + + u8 v4_prefix_mask_len; + +#define IPV6_NON_TRANSLATABLE_PREFIX_MAP_ALG_HASH 0x1 +#define IPV6_NON_TRANSLATABLE_PREFIX_MAP_ALG_TTL 0x2 +#define IPV6_NON_TRANSLATABLE_PREFIX_MAP_ALG_RANDOM 0x3 + u8 non_translatable_v6_prefix_v4_map_prefix_alg; + + u8 ipv6_tos_value; + + u8 ipv4_tos_value; + + u8 pad2; + + u8 pad3; + + u32 v4_prefix; + + /* + * MAP-T/MAP-E specific parameters + */ + u8 xlat_type; + + u32 cpe_domain_v6_prefix[4]; + u8 cpe_domain_v6_prefix_len; + + u32 cpe_domain_v4_prefix; + u8 cpe_domain_v4_prefix_len; + + u32 external_domain_v6_prefix[4]; + u8 external_domain_v6_prefix_len; + + u8 port_sharing_ratio_bits; + u8 consecutive_ports_bits; + +} spp_api_cnat_v4_single_xlat_config_req; + +/* + * Single Response for the xlat config request + */ +typedef struct _spp_api_cnat_v4_single_xlat_config_resp { + u8 v4_if_num_rc; + u8 v6_if_num_rc; + u8 v4_to_v6_tcp_mss_rc; + u8 v6_to_v4_tcp_mss_rc; + + u8 v6_prefix_rc; + u8 ubit_on_rc; + u8 v4_prefix_rc; + u8 xlat_id_rc; + + u8 non_translatable_v6_prefix_v4_map_alg_rc; + u8 ipv4_dfbit_clear_rc; + u8 icmpv6_ptb_mtu_set_rc; + u8 ipv4_tos_set_rc; + + u8 ipv6_tos_set_rc; + u8 pad1; + u8 pad2; + u8 pad3; +} spp_api_cnat_v4_single_xlat_config_resp; + +/* + * Bulk Response for the xlat config request + */ +typedef struct _spp_api_cnat_v4_bulk_xlat_config_resp { + u16 _spp_msg_id; + u16 pad; + + u32 bulk_rc; + + u32 num_xlat_entries; + + spp_api_cnat_v4_single_xlat_config_resp xlat_config_resp; + +} spp_api_cnat_v4_bulk_xlat_config_resp; + +typedef struct _spp_api_v6rd_v4_single_v6rd_config_resp { + u8 v6rd_id_rc; + u8 v4_if_num_rc; + u8 v6_if_num_rc; + u8 tunnel_source_rc; + u8 tunnel_mtu_rc; + u8 ipv4masklen_prefix_rc; + u8 ipv4masklen_suffix_rc; + u8 ttl_rc; + u8 tos_rc; + u8 anycast_rc; + u8 v6_prefix_rc; + u8 v6_br_unicast_rc; + u8 reassembly_rc; + u8 pad1; + u8 pad2; + u8 pad3; +} spp_api_v6rd_v4_single_v6rd_config_resp_t; + +typedef struct _spp_api_v6rd_v4_bulk_v6rd_config_resp { + u16 _spp_msg_id; + u16 pad; + u32 bulk_rc; + u32 num_v6rd_entries; + spp_api_v6rd_v4_single_v6rd_config_resp_t v6rd_config_resp[0]; +} spp_api_v6rd_v4_bulk_v6rd_config_resp_t; + +/* + * Single Request for MAPE config + */ +typedef struct _spp_api_mape_single_config_req { + + /* + * Indicates the mape instance id - How big will this value be + * Can we restrict it between 0..255, that way the APP code + * can use an array to store the xlat instances. + */ + u32 mape_id; + +#define MAPE_ENTRY_DELETE 0x0000 +#define MAPE_IPV4_SVI_IF_NUM_CONFIG 0x0001 +#define MAPE_IPV6_SVI_IF_NUM_CONFIG 0x0002 +#define MAPE_IPV4_TO_IPV6_TCP_MSS_CONFIG 0x0004 +#define MAPE_IPV6_TO_IPV4_TCP_MSS_CONFIG 0x0008 +#define MAPE_CPE_V4_PREFIX_CONFIG 0x0010 +#define MAPE_CPE_V6_PREFIX_CONFIG 0x0020 +#define MAPE_PORT_SHARING_RATIO_CONFIG 0x0040 +#define MAPE_CONSECUTIVE_PORTS_CONFIG 0x0080 +#define MAPE_PATH_MTU 0x0100 +#define MAPE_TUNNEL_ENDPOINT_V6_CONFIG 0x0200 + + u32 mape_config_fields_enable; + + /* + * If numbers of the IPv6 and IPv4 SVI interfaces + */ + u32 ipv6_svi_if_num; + u32 ipv4_svi_if_num; + + /* + * TCP MSS values for the 2 XLAT directions + */ + u16 v4_to_v6_tcp_mss; + u16 v6_to_v4_tcp_mss; + + /* + * Path v6 MTU. + */ + u32 path_mtu; + + /* + * CPE IPv6 prefix and mask len. + */ + u32 cpe_domain_v6_prefix[4]; + u8 cpe_domain_v6_prefix_len; + + /* + * CPE IPv4 prefix and mask len. + */ + u32 cpe_domain_v4_prefix; + u8 cpe_domain_v4_prefix_len; + + /* + * BR IPv6 tunnel end point V6 prefix and mask len. + */ + u32 aftr_tunnel_endpoint_address_v6[4]; + u8 aftr_tunnel_endpoint_address_v6_len; + + /* + * BR IPv6 tunnel end point V6 prefix and mask len. + */ + u8 port_sharing_ratio_bits; + u8 consecutive_ports_bits; + +} spp_api_mape_single_config_req; + + +/* + * Single Response for the mape config response + */ +typedef struct _spp_api_mape_single_config_resp { + u8 v4_if_num_rc; + u8 v6_if_num_rc; + u8 v4_to_v6_tcp_mss_rc; + u8 v6_to_v4_tcp_mss_rc; + u8 mape_id_rc; + u8 path_mtu_rc; + u8 cpe_v6_prefix_rc; + u8 cpe_v4_prefix_rc; + u8 tunnel_endpoint_prefix_rc; + u8 port_sharing_ratio_rc; + u8 port_contiguous_rc; + u8 pad1; +} spp_api_mape_single_config_resp; + +/* + * Bulk Response for the mape config request + */ +typedef struct _spp_api_mape_bulk_config_resp { + u16 _spp_msg_id; + u16 pad; + u32 bulk_rc; + u32 num_mape_entries; + spp_api_mape_single_config_resp mape_config_resp; +} spp_api_mape_bulk_config_resp; + + +#endif /* __CNAT_CONFIG_H__ */ diff --git a/vnet/vnet/vcgn/cnat_config_api.h b/vnet/vnet/vcgn/cnat_config_api.h new file mode 100644 index 00000000000..0789d6a92af --- /dev/null +++ b/vnet/vnet/vcgn/cnat_config_api.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CNAT_CONFIG_API_H__ +#define __CNAT_CONFIG_API_H__ + +typedef struct _spp_api_cnat_v4_add_vrf_map { + u16 _spp_msg_id; + u8 rc; + u8 pad; + u32 i_vrf_id; + u32 o_vrf_id; + u16 i_vrf; + u16 o_vrf; + u32 start_addr[8]; + u32 end_addr[8]; +} spp_api_cnat_v4_add_vrf_map_t; + +typedef struct _spp_api_cnat_v4_config_nfv9_logging { + u16 _spp_msg_id; + u8 rc; + u8 enable; + u32 ipv4_address; + u32 i_vrf_id; + u16 i_vrf; + u16 port; + u16 refresh_rate; + u16 timeout_rate; + u16 path_mtu; + u8 nfv9_global_collector; + u8 session_logging; +} spp_api_cnat_v4_config_nfv9_logging_t; + + +#endif diff --git a/vnet/vnet/vcgn/cnat_db.h b/vnet/vnet/vcgn/cnat_db.h new file mode 100644 index 00000000000..3596e2384e6 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_db.h @@ -0,0 +1,701 @@ +/* + *------------------------------------------------------------------ + * cnat_db.h - translation database definitions + * + * Copyright (c) 2007-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_DB_H__ +#define __CNAT_DB_H__ + +#include "cnat_cli.h" +#include "cnat_ports.h" +#include "index_list.h" + +#define VRF_NAME_LEN_STORED 12 +#define MAX_VRFID 400 +typedef struct _cnat_svi_params_entry { + u16 svi_type; + u16 pad; + + u32 vrf_id; + u16 if_num; + + u32 ipv6_addr[4]; + u32 ipv4_addr; + + u8 direction; + u32 tbl_id; /* vrf */ + u32 vrf_override_id; /* tbl_id for override vrf */ + u8 vrf_override_flag; + u8 partition_id; +} cnat_svi_params_entry; + +typedef struct _cnat_ingress_vrfid_name_entry { + u32 vrf_id; + u16 ref_count; /*no# of serviceApps under a single vrf*/ + u8 vrf_name[VRF_NAME_LEN_STORED]; + u16 pad1; +} cnat_ingress_vrfid_name_entry; +#define HASH_ENHANCE 4 + +#define CNAT_DB_SIZE (PLATFORM_MAX_NAT_ENTRIES / PLATFORM_CNAT_INSTS) +#define CNAT_MAIN_HASH_SIZE (HASH_ENHANCE * PLATFORM_CNAT_MAIN_PRELIM_HASH_SIZE) +#define CNAT_MAIN_HASH_MASK (CNAT_MAIN_HASH_SIZE-1) + +#define CNAT_USER_DB_SIZE (PLATFORM_MAX_USER_ENTRIES / PLATFORM_CNAT_INSTS) +#define CNAT_USER_HASH_SIZE (HASH_ENHANCE * PLATFORM_CNAT_USER_PRELIM_HASH_SIZE) +#define CNAT_USER_HASH_MASK (CNAT_USER_HASH_SIZE-1) + +#define CNAT_SESSION_DB_SIZE (PLATFORM_MAX_NAT_ENTRIES / PLATFORM_CNAT_INSTS) +#define CNAT_SESSION_HASH_SIZE (HASH_ENHANCE * PLATFORM_CNAT_MAIN_PRELIM_HASH_SIZE) +#define CNAT_SESSION_HASH_MASK (CNAT_SESSION_HASH_SIZE-1) + + +#define CNAT_MAX_SESSIONS_PER_BIB 0xFFFF + +#define NUM_BITS_IN_UWORD (8*sizeof(uword)) + +/* No. of per ip/port config will be limited to 1024 */ +#define CNAT_TIMEOUT_HASH_SIZE 1024 +#define CNAT_TIMEOUT_HASH_MASK (CNAT_TIMEOUT_HASH_SIZE - 1) +#define CNAT_TIMEOUT_FULL_MASK 0xFFFFFFFFFFFFFFFF +#define CNAT_TIMEOUT_IPPROT_MASK PLATFORM_CNAT_TIMEOUT_IPPROT_MASK +#define CNAT_TIMEOUT_PORTPROT_MASK PLATFORM_CNAT_TIMEOUT_PORTPROT_MASK + +#define TRUE 1 +#define FALSE 0 + +/* + * The key structure. All fields are in NETWORK byte order! + */ +typedef struct { + u32 ipv4; + u16 port; + u16 vrf; //bit0-12:vrf, bit13:unused, bit14-15:protocol +} cnat_db_key_t; + +/* bit14-15:protocol in cnat_db_key_t */ +#define CNAT_INVALID_PROTO 0x0000 +#define CNAT_PPTP 0x0000 +#define CNAT_UDP 0x4000 +#define CNAT_TCP 0x8000 +#define CNAT_ICMP 0xc000 +#define CNAT_VRF_MASK 0x3fff +#define CNAT_PRO_MASK 0xc000 +#define CNAT_PRO_SHIFT 14 + +/* + * Maximum number of VRF entries supported + */ +#define CNAT_MAX_VRFMAP_ENTRIES (CNAT_VRF_MASK + 1) +/* + * for hashing purposes, fetch the key in one instr. + */ +typedef union { + cnat_db_key_t k; + u64 key64; +} cnat_key_t; + +typedef struct { + cnat_key_t k; + u32 bucket; +} cnat_db_key_bucket_t; + +typedef struct { + u32 ipv6[4]; + cnat_key_t ipv4_key; +} dslite_key_t; + +typedef struct { +/* + cnat_db_key_bucket_t ck; + u32 ipv6[4]; +*/ + dslite_key_t dk; + u32 bucket; +} dslite_db_key_bucket_t; + + +/* Per port/ip timeout related strucutres */ +extern index_slist_t *cnat_timeout_hash; + +typedef struct { + cnat_key_t timeout_key; + u16 timeout_value; +} cnat_timeout_t; + +typedef struct { + cnat_timeout_t t_key; + index_slist_t t_hash; +} cnat_timeout_db_entry_t; + +extern cnat_timeout_db_entry_t *cnat_timeout_db; + +/* + * Main translation database entries. Currently 0x5A = 90 bytes in length. + * Given 20,000,000 entries, it saves nearly 1gb of SDRAM to pack the entries + * and pay the extra prefetch. So, that's what we do. + */ + +typedef struct { + /* 0x00 */ + index_slist_t out2in_hash; /* hash-and-chain, x2 */ + index_slist_t in2out_hash; + + /* 0x08 */ + u16 flags; /* Always need flags... */ +#define CNAT_DB_FLAG_PORT_PAIR (1<<0) +#define CNAT_DB_FLAG_TCP_ACTIVE (1<<1) +#define CNAT_DB_FLAG_ENTRY_FREE (1<<2) +#define CNAT_DB_FLAG_UDP_ACTIVE (1<<3) +#define CNAT_DB_FLAG_STATIC_PORT (1<<4) +/* This alg entry is set for FTP data connection */ +#define CNAT_DB_FLAG_ALG_ENTRY (1<<5) + +/* Will be set for TCP connection with destination port - 1723 + * note - here CNAT_DB_FLAG_TCP_ACTIVE is also set */ +#define CNAT_DB_FLAG_PPTP_TUNNEL_INIT (1<<6) +#define CNAT_DB_FLAG_PPTP_TUNNEL_ACTIVE (1<<7) + +/* for PPTP GRE packtes */ +#define CNAT_DB_FLAG_PPTP_GRE_ENTRY (1<<8) + +/* for PCP support */ +#define CNAT_DB_FLAG_PCPI (1<<9) +#define CNAT_DB_FLAG_PCPE (1<<10) +#define CNAT_PCP_FLAG (CNAT_DB_FLAG_PCPI | CNAT_DB_FLAG_PCPE) + +#define CNAT_TAC_SEQ_MISMATCH (1<<11) +/* This alg entry is set for ftp control connection */ +#define CNAT_DB_FLAG_ALG_CTRL_FLOW (1<<12) + +/* This is for marking the state where connection is closing */ +#define CNAT_DB_FLAG_TCP_CLOSING (1<<13) + +#define CNAT_DB_DSLITE_FLAG (1<<14) +#define CNAT_DB_NAT64_FLAG (1<<15) + + /* 0x0A */ + u16 vrfmap_index; /* index of vrfmap */ + + /* 0x0C */ + u32 user_index; /* index of user that owns this entry */ + + /* 0x10 */ + cnat_key_t out2in_key; /* network-to-user, outside-to-inside key */ + + /* 0x18 */ + cnat_key_t in2out_key; /* user-to-network, inside-to-outside key */ + + /* 0x20 */ + index_dlist_t user_ports; /* per-user translation list */ + + /* 0x28 */ + u32 out2in_pkts; /* pkt counters */ + + /* 0x2C */ + u32 in2out_pkts; + + /* 0x30 */ + u32 entry_expires; /* timestamp used to expire translations */ + + /* 0x34 */ + union { /* used by FTP ALG, pkt len delta due to FTP PORT cmd */ + u16 delta; + i8 alg_dlt[2]; /* two delta values, 0 for previous, 1 for current */ + u16 il; /* Used to indicate if interleaved mode is used + in case of RTSP ALG */ + } alg; + + /* 0x36 */ + u16 timeout; + + /* 0x38 */ + union { + struct seq_pcp_t { + u32 tcp_seq_num; /* last tcp (FTP) seq # that has pkt len change due to PORT */ + u32 pcp_lifetime; /* peer and map life time value sent in reply*/ + } seq_pcp; + + /* This is for TCP seq check */ + struct tcp_seq_chk_t { + u32 seq_no; + u32 ack_no; + } tcp_seq_chk; + + /* used for pptp alg entries + 1. only tunnel : prev and next = 0xFFFFFFFF + 2. first gre entry : prev = tunnel db, next = next gre db + 3. last gre entry : prev = previous gre/tunnel db, next= 0xFFFFFFFF; + + *while adding gre entry- updated at the begining of head + *while deleting gre entry - hash look up will be done and prev and next are adjusted + * while deleting need not traverse throufgh the list, as done in index_dlist_remelem + + */ + index_dlist_t pptp_list; + + } proto_data; + + /* 0x40 */ + u32 dst_ipv4; /* pointer to ipv4 dst list, used in evil mode */ + + /* 0x44 */ + u16 dst_port; + + /* 0x46 */ + u16 dslite_nat44_inst_id; + + /* 0x48 */ + u32 session_head_index; + + /* 0x4C */ + u16 nsessions; + + /* 0x4E */ + u8 unused; + + /* 0x4F */ + u8 scale; + + /* 0x50 */ + u32 diff_window; + + /* Sizeof cnat_main_db_entry_t = 0x54 */ +} cnat_main_db_entry_t; + +/* Caution ... + * 1. The size of this structure should be same as that of + * nat64_bib_user_entry_t + * 2. Do not alter the position of first four fields + */ +typedef struct { + /* 0x00 */ + index_slist_t user_hash; /* hash 'n chain bucket chain */ + + /* 0x04 */ + u16 ntranslations; /* translations hold by this user */ + + /* 0x06 */ + u8 icmp_msg_count; /* use to rate limit imcp send to this user */ + + /* 0x07 */ + u8 flags; /* To identfiy whether it is NAT64 or NAT44 etc */ +#define CNAT_USER_DB_NAT44_FLAG 0 +#define CNAT_USER_DB_NAT64_FLAG 1 +#define CNAT_USER_DB_DSLITE_FLAG 2 +#define CNAT_USER_DB_PORT_LIMIT_EXCEEDED 0X80 + + /* 0x08 */ + u32 translation_list_head_index; + + /* 0x0C */ + u32 portmap_index; /* index of bound port-map */ + + /* 0x10 */ + cnat_key_t key; /* For dslite this should store IPv6 address */ + u32 ipv6[4]; // B4 ipv6 address + /* 0x18 */ +#if 0 + u32 temp1; + u32 temp2; + u32 temp3; +#endif + /* 0x28 same as nat64_user_db */ +#ifndef NO_BULK_LOGGING + /* Now adding 8 more bytes for bulk allocation.. This makes it + * 0x30 (48). Added the same to nat64_bib_user_entry_t make the + * the sizes equal. For nat64 stful, we may support bulk allocation + * later. + */ + /* Indicates the currently used bulk port range */ + i16 bulk_port_range_cache[BULK_RANGE_CACHE_SIZE]; +#endif /* #ifndef NO_BULK_LOGGING */ +} cnat_user_db_entry_t; + +/* + * cnat_session_entry_t + * This structure represents the cnat session table. It maintains the + * information about the destination of a given translation (main db) + * There would be entry here only if packets are send to more than 1 destn + * from the same source. + */ +typedef struct { + + /* 0x00 */ + index_slist_t cnat_session_hash; + + /* 0x04 */ + u32 main_db_index; /* would point to v4 src transport address */ + + /* 0x08 */ + cnat_key_t v4_dest_key; + + /* 0x10 */ + u16 flags; /* Same as cnat_main_db_t */ + + /* 0x12 */ + u16 timeout; + + /* 0x14 */ + u32 entry_expires; + /* 0x18 */ + index_dlist_t main_list; + /* 0x20 = 32 B */ + + union { /* alg same as cnat_main_db_t */ + u16 delta; + i8 alg_dlt[2]; + u16 il; + } alg; + + /* 0x22 */ + u16 tcp_flags; + + /* 0x24 */ + u32 tcp_seq_num; + + /* 0x28 */ + u32 ack_no; + + /* 0x2C */ + u32 window; + + /* 0x30 */ + u8 scale; + + /* 0x31 */ + u8 pad; + + /* 0x32 */ +} cnat_session_entry_t; + + + +/* + * out2in and in2out hash bucket arrays are simply arrays of index_slist_t's + */ + +typedef enum { + CNAT_DB_CREATE_DEFAULT=0, /* honor cnat_main_db_max_ports_per_user */ + CNAT_DB_CREATE_OVERRIDE, /* just do it. */ +} cnat_db_create_policy_t; + +typedef struct { + cnat_key_t in2out_key; + cnat_key_t out2in_key; + u32 dst_ipv4; /* evil for mode only */ + u16 cnat_instance; + cnat_portmap_t *portmap; + u16 *portmap_inuse; + cnat_main_db_entry_t *db; + cnat_db_create_policy_t policy; + port_pair_t pair_of_ports; +} cnat_db_create_args_t; + +extern cnat_main_db_entry_t *cnat_main_db; +extern cnat_user_db_entry_t *cnat_user_db; +extern cnat_session_entry_t *cnat_session_db; + +#define S_WAO 0 +#define S_WA 1 /* waiting for address pool */ +#define S_WO 2 /* waiting for outside vrf */ +#define S_RUN 3 /* got everything */ +#define S_DEL 4 /* just delete */ + +#define INVALID_UIDX 0xffff /*invalid svi app uidb index */ +#define INVALID_VRFID 0xffffffff /*invalid vrf id */ + +typedef struct { + u16 status; + u16 tcp_mss; //tcp max segment size for this inside vrf */ + u32 delete_time; + u16 i_vrf; //inside SVI uidx + u16 o_vrf; //outside SVI uidx + u32 i_vrf_id; //inside vrf id + u32 o_vrf_id; //outside vrf id + cnat_portmap_v2_t *portmap_list; + u32 nfv9_logging_index; + u32 syslog_logging_index; + u16 ip_n_to_1; +#ifndef NO_BULK_LOGGING + bulk_alloc_size_t bulk_size; +#endif /* #ifndef NO_BULK_LOGGING */ + u32 pcp_server_addr; + u32 pcp_server_port; + + u8 nf_logging_policy; + u8 syslog_logging_policy; + u8 frag_tout; + u32 rseed_ip; + u16 port_limit; + u8 tcp_seq_check_enable; + u8 pad; + u32 tcp_seq_user_window; + u8 filter_policy; + u8 ignore_port; +} cnat_vrfmap_t; + +/* + * When creating cnat_vrfmap entry, ensure that any already + * configured logging info is taken into account + */ +#define CNAT_SET_VRFMAP_NFV9_LOGGING_INDEX(logging_index, i_vrf) \ +do { \ + cnat_nfv9_logging_info_t *my_nfv9_logging_info = 0; \ + pool_foreach (my_nfv9_logging_info, cnat_nfv9_logging_info, ({ \ + if (my_nfv9_logging_info->i_vrf == i_vrf) { \ + logging_index = my_nfv9_logging_info - cnat_nfv9_logging_info; \ + break; \ + } \ + })); \ +while (0) + + +typedef struct { + /* + * spp_ctx_alloc() call failed + */ + u64 nfv9_logging_context_creation_fail_count; + + /* + * Cannot send the existing logging pkt, so cannot create + * any additional packets for logging purposes + */ + u64 nfv9_logging_context_creation_deferred_count; + + /* + * Cannot send the existing logging pkt due to cnat_rewrite_output + * superframe being full. + */ + u64 nfv9_downstream_constipation_count; + + /* + * buffer for spp_ctx_alloc() call failed + */ + u64 nfv9_logging_context_buffer_allocation_fail_count; + +} cnat_global_counters_t; + + +extern cnat_global_counters_t cnat_global_counters; + +extern u16 *cnat_portmap_indices_by_vrf; +extern cnat_vrfmap_t *cnat_portmap_by_vrf; +extern cnat_portmap_t **cnat_portmaps; +extern u16 **cnat_portmaps_inuse; + +extern cnat_vrfmap_t *cnat_map_by_vrf; + +/* + * Special define to indicate that the VRF map index entry is empty + */ +#define VRF_MAP_ENTRY_EMPTY 0xffff +extern u16 vrf_map_array[CNAT_MAX_VRFMAP_ENTRIES]; + +extern cnat_svi_params_entry svi_params_array[CNAT_MAX_VRFMAP_ENTRIES]; +extern cnat_ingress_vrfid_name_entry vrfid_name_map[MAX_VRFID]; + +extern index_slist_t *cnat_out2in_hash; +extern index_slist_t *cnat_in2out_hash; +extern index_slist_t *cnat_user_hash; +extern index_slist_t *cnat_session_hash; + +typedef enum { + CNAT_DB_IN2OUT = 0, + CNAT_DB_OUT2IN, +} cnat_db_which_t; + +typedef enum { + CNAT_NO_ICMP_MSG =0, + CNAT_ICMP_MSG, +} cnat_icmp_msg_t; + +typedef struct { + cnat_errno_t error; + cnat_icmp_msg_t gen_icmp_msg; + u32 svi_addr; +} cnat_gen_icmp_info; + +typedef cnat_vrfmap_t nat64_vrfmap_t; +typedef cnat_portmap_v2_t nat64_portmap_v2_t; + +#define CNAT_V4_GET_HASH(key64, hash, mask) \ + a = key64; \ + b = c = 0x9e3779b97f4a7c13LL; \ + /* Jenkins hash, arbitrarily use c as the "answer" */ \ + hash_mix64(a, b, c); \ + hash = c & mask; + +#define CNAT_V4_GET_SESSION_HASH(main_index, in_addr, port, vrf, hash, mask) \ + a = main_index ^ in_addr ^ port ^ vrf; \ + b = c = 0x9e3779b9; \ + /* Jenkins hash, arbitrarily use c as the "answer" */ \ + hash_mix32(a, b, c); \ + hash = c & mask; + +#define CNAT_V4_GET_FRAG_HASH(key64, key32, hash, mask) \ + a = key64; \ + b = key32; \ + c = 0x9e3779b97f4a7c13LL; \ + hash_mix64(a, b, c); \ + hash = c % mask; + +#define CNAT_DB_UPDATE_IN2OUT_TIMER \ + db->entry_expires = cnat_current_time; \ + db->in2out_pkts++; + +#define CNAT_DB_TIMEOUT_RST(db) \ + if(PREDICT_TRUE(db->entry_expires != 0 )) \ + db->entry_expires = cnat_current_time; + +#define DEBUG_I2O_DROP(debug_flag) \ +if (debug_i_flag & debug_flag) { \ + cnat_db_debug_i2o_drop(&ki); \ +} + + +cnat_main_db_entry_t *cnat_main_db_create (cnat_db_create_args_t *a); +void cnat_main_db_entry_delete(cnat_main_db_entry_t *ep); + +void cnat_delete_main_db_entry(cnat_main_db_entry_t *ep); +void cnat_delete_main_db_entry_v2(cnat_main_db_entry_t *ep); + + +cnat_main_db_entry_t* +cnat_get_main_db_entry(cnat_db_key_bucket_t *ki, + port_pair_t port_type, + cnat_errno_t *error, + cnat_user_db_entry_t ** user_db_entry); + +cnat_main_db_entry_t* +cnat_get_main_db_entry_v2(cnat_db_key_bucket_t *ki, + port_pair_t port_pair_type, + port_type_t port_type, + cnat_gen_icmp_info *info, + cnat_key_t *dest_info); + +cnat_main_db_entry_t* +cnat_create_static_main_db_entry_v2(cnat_db_key_bucket_t *ki, + cnat_db_key_bucket_t *ko, + cnat_vrfmap_t *my_vrfmap, + cnat_gen_icmp_info *info); + +cnat_main_db_entry_t* +cnat_create_main_db_entry_and_hash(cnat_db_key_bucket_t *ki, + cnat_db_key_bucket_t *ko, + cnat_user_db_entry_t *udb); + +cnat_user_db_entry_t* +cnat_user_db_create_entry(cnat_db_key_bucket_t *uki, + u32 portmap_index); + +cnat_user_db_entry_t* +cnat_user_db_lookup_entry(cnat_db_key_bucket_t *uki); + +cnat_main_db_entry_t* +cnat_main_db_lookup_entry(cnat_db_key_bucket_t *ki); + +cnat_main_db_entry_t* +cnat_main_db_lookup_entry_out2in (cnat_db_key_bucket_t *ko); + +void cnat_main_db_entry_dump (cnat_main_db_entry_t *db); +void cnat_db_in2out_hash_delete (cnat_main_db_entry_t *ep, cnat_user_db_entry_t *up); +void cnat_db_out2in_hash_delete (cnat_main_db_entry_t *ep); +void cnat_user_db_delete (cnat_user_db_entry_t *up); +void cnat_db_debug_i2o_drop(cnat_db_key_bucket_t *ki); + +/* + * Function to dump the Hash Table that maps if_num to uidb_index + */ +extern void cnat_if_num_hash_table_dump(void); + +#define MAIN_DB_TYPE 0 +#define SESSION_DB_TYPE 1 +u16 query_and_update_db_timeout(void *db, u8 db_type); + +u16 cnat_timeout_db_create (cnat_timeout_t t_entry); +void cnat_timeout_db_delete(cnat_key_t t_key); + +cnat_session_entry_t * +cnat_create_session_db_entry(cnat_key_t *ko, + cnat_main_db_entry_t *bdb, u8 log); + +void cnat_dest_update_main2session(cnat_main_db_entry_t *mdb, + cnat_session_entry_t *sdb); + +cnat_session_entry_t *cnat_handle_1to2_session( + cnat_main_db_entry_t *mdb, + cnat_key_t *dest_info); + +void cnat_add_dest_n_log( + cnat_main_db_entry_t *mdb, + cnat_key_t *dest_info); + +cnat_session_entry_t * + cnat_session_db_lookup_entry(cnat_key_t *ko,u32 main_db_index); + +cnat_session_entry_t * + cnat_session_db_edm_lookup_entry(cnat_key_t *ko, + u32 session_head_index, + u32 main_db_index); + + +typedef struct{ + u32 sessions; + u32 active_translations; + u32 num_dynamic_translations; + u32 num_static_translations; + u64 in2out_drops_port_limit_exceeded; + u64 in2out_drops_system_limit_reached; + u64 in2out_drops_resource_depletion; + u64 no_translation_entry_drops; + u32 num_subscribers; + u32 dummy; + u64 drops_sessiondb_limit_exceeded; +} nat44_dslite_common_stats_t; + +typedef struct { + u32 translation_delete_count; + u32 translation_create_count; + u32 out2in_forwarding_count; +} nat44_dslite_global_stats_t; + +typedef struct { + u64 v4_to_v6_tcp_seq_mismatch_drop_count; + u64 v4_to_v6_tcp_seq_mismatch_count; + u64 v4_to_v6_out2in_session_create_count; + u64 v4_to_v6_end_point_filter_drop_count; +} nat44_counters_stats_t; + +#define NAT44_STATS 0 +#define DSLITE_STATS 1 +extern nat44_dslite_common_stats_t nat44_dslite_common_stats[255]; /* 0 is for nat44 */ +extern nat44_dslite_global_stats_t nat44_dslite_global_stats[2]; /* 0 for nat44 and 1 for dslite */ +extern nat44_counters_stats_t nat44_counters_stats[CNAT_MAX_VRFMAP_ENTRIES];/*For displaying show cgn <cgn-name> inside-vrf <vrf-name> counters */ + +#define NAT44_COMMON_STATS nat44_dslite_common_stats[NAT44_RESERVED_INST_ID] +#define NAT44_GLOBAL_STATS nat44_dslite_global_stats[NAT44_STATS] +#define DSLITE_GLOBAL_STATS nat44_dslite_global_stats[DSLITE_STATS] +#define SESSION_LOG_ENABLE 1 +#define ALG_ENABLED_DB(db) \ + ((db->flags & CNAT_PCP_FLAG) || \ + (db->flags & CNAT_DB_FLAG_ALG_CTRL_FLOW) || \ + (db->flags & (CNAT_DB_FLAG_PPTP_TUNNEL_INIT | \ + CNAT_DB_FLAG_PPTP_TUNNEL_ACTIVE))) + + +#endif /* __CNAT_DB_H__ */ diff --git a/vnet/vnet/vcgn/cnat_db_scanner.c b/vnet/vnet/vcgn/cnat_db_scanner.c new file mode 100644 index 00000000000..6e536d84c79 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_db_scanner.c @@ -0,0 +1,493 @@ +/* + *--------------------------------------------------------------------------- + * cnat_db_scanner.c - cnat_db_scanner dispatch function and initialization + * + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> +#include <vppinfra/string.h> +#include <vppinfra/random.h> +#include <vppinfra/fifo.h> +#include <vppinfra/hash.h> +#include <vppinfra/format.h> + + +#include "cnat_db.h" +#include "cnat_logging.h" +#include "cnat_global.h" +#include "cnat_ipv4_udp.h" +#include "cnat_common_api.h" + +u32 translation_create_count, translation_delete_count; +u32 translation_create_rate, translation_delete_rate; + +u32 in2out_forwarding_count, out2in_forwarding_count; +u32 in2out_forwarding_rate, out2in_forwarding_rate; + +u32 nat44_active_translations; +u32 num_entries; +uword check_these_pool_indices[2*MAX_DB_ENTRY_SELECTED_PER_SCAN]; + +#define CNAT_DB_SCANNER_TURN_ON 5 /* just an arbitary number for easier debugging */ + +//extern u32 pcp_throttle_count; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_db_scanner_main_t; + +cnat_db_scanner_main_t cnat_db_scanner_main; + + +static inline void check_session_for_expiry( + cnat_session_entry_t * sdb, u8 timeout_dirty + /*,dslite_table_entry_t *dslite_entry_ptr*/) +{ + void cnat_delete_session_db_entry (cnat_session_entry_t *ep, u8 log); + /* Tasks - + * 1. Check for expiry for this entry + * 2. Delete if expired + */ + u32 timeout = 0; + + switch(sdb->v4_dest_key.k.vrf & CNAT_PRO_MASK) { + case CNAT_TCP: + if (sdb->flags & CNAT_DB_FLAG_TCP_ACTIVE) { + timeout = sdb->timeout; + if(PREDICT_FALSE(timeout_dirty)) { + timeout = query_and_update_db_timeout( + (void *)sdb, SESSION_DB_TYPE); + } + if(PREDICT_TRUE(timeout == 0)) { + timeout = tcp_active_timeout; + //dslite_entry_ptr->timeout_info.tcp_active_timeout; + } + } else { + timeout = tcp_initial_setup_timeout; + //dslite_entry_ptr->timeout_info.tcp_initial_setup_timeout; + } + break; + case CNAT_UDP: + if (sdb->flags & CNAT_DB_FLAG_UDP_ACTIVE) { + timeout = sdb->timeout; + if(PREDICT_FALSE(timeout_dirty)) { + timeout = query_and_update_db_timeout( + (void *)sdb, SESSION_DB_TYPE); + } + + if(PREDICT_TRUE(timeout == 0)) { + timeout = udp_act_session_timeout; + //dslite_entry_ptr->timeout_info.udp_act_session_timeout; + } + } else { + timeout = udp_init_session_timeout; + //dslite_entry_ptr->timeout_info.udp_init_session_timeout; + } + break; + case CNAT_ICMP: + timeout = icmp_session_timeout; + //dslite_entry_ptr->timeout_info.icmp_session_timeout; + break; + case CNAT_PPTP: + timeout = pptp_cfg.timeout; + break; + default: + return; + } + /* Changes required for clearing sessions */ + if (PREDICT_FALSE((sdb->entry_expires == 0) || + (sdb->entry_expires + timeout < cnat_current_time))) { + cnat_delete_session_db_entry(sdb, TRUE); + } +} + +static u8 handle_db_scan_for_sessions( + cnat_main_db_entry_t *db, int *dirty_index, uword db_index + /* ,dslite_table_entry_t *dslite_entry_ptr */) +{ + /* Tasks - + * 1. Traverse through the sessions and check for timeouts + * 2. Delete sessions that have exipred + * 3. Check if the db has only one session remaining.. if so, + * the details of the session has to be moved to main db + * and session db entry needs to be freed + * 4. If db does not have any sessions left, the db itself + * needs to be deleted. + */ + u32 nsessions, session_index_head, session_index; + cnat_session_entry_t *sdb; + u8 timeout_dirty = FALSE; + + if(PREDICT_FALSE(*dirty_index == db_index)) { + *dirty_index = -1; + } + if(PREDICT_FALSE(timeout_dirty_flag == 1)) { + timeout_dirty_flag = 0; + *dirty_index = db_index; + timeout_dirty = TRUE; + } + + session_index_head = session_index = db->session_head_index; + nsessions = db->nsessions; + + do { + sdb = cnat_session_db + session_index; + if(PREDICT_FALSE(!sdb)) { + //TO DO: Debug msg? + return FALSE; + } + session_index = sdb->main_list.next; + check_session_for_expiry(sdb, timeout_dirty /*,dslite_entry_ptr*/); + nsessions--; /* To ensure that we do not get in to an infinite loop */ + } while(session_index != session_index_head + && db->session_head_index != EMPTY && + nsessions); + + /* Note.. the code below assumes that while deleting the + * sessions, we do not delete the main db entry if it does + * not have any sessions anymore + */ + if(PREDICT_FALSE((!db->nsessions) && + (!(db->flags & CNAT_DB_FLAG_STATIC_PORT)))) { + cnat_delete_main_db_entry_v2(db); + return TRUE; /* to indicate that main db was deleted */ + } + return FALSE; +} + +static void cnat_db_scanner(void) +{ + cnat_main_db_entry_t * db; + u32 timeout; + cnat_vrfmap_t *my_vrfmap __attribute__((unused)) = 0; + static int dirty_index = -1; + u16 instance __attribute__((unused)); + //dslite_table_entry_t *dslite_entry_ptr; + u32 i; + uword db_index; + //pcp_throttle_count = 0; + + for (i = 0; i < num_entries; i++) { + db_index = check_these_pool_indices[i]; + db = cnat_main_db + db_index; + timeout=0; + my_vrfmap = 0; + +#if 0 + if(PREDICT_FALSE(db->flags & CNAT_PCP_FLAG)) { + + if(db->proto_data.seq_pcp.pcp_lifetime < cnat_current_time) { + /* mark as implicit */ + db->flags &= ~CNAT_PCP_FLAG; + } + continue; + } + +#endif + if(PREDICT_FALSE(db->nsessions > 1)) { + if(PREDICT_FALSE( + handle_db_scan_for_sessions(db, &dirty_index, db_index /*,dslite_entry_ptr */))) { + continue; + } else if(PREDICT_TRUE(db->nsessions > 1)) { + continue; + } + /* if there is exactly one dest left.. let it fall through + * and check if that needs to be deleted as well + */ + } + +#if 0 + if (PREDICT_FALSE(db->flags & CNAT_DB_FLAG_STATIC_PORT)) { + if (PREDICT_FALSE(db->flags & CNAT_DB_DSLITE_FLAG)) { + if(PREDICT_FALSE( + ((dslite_entry_ptr->nf_logging_policy != SESSION_LOG_ENABLE) && + (dslite_entry_ptr->syslog_logging_policy != SESSION_LOG_ENABLE)) + || (db->nsessions !=1))) { + continue; + } + } else { + my_vrfmap = cnat_map_by_vrf + db->vrfmap_index; + if(PREDICT_FALSE( + ((my_vrfmap->nf_logging_policy != SESSION_LOG_ENABLE) && + (my_vrfmap->syslog_logging_policy != SESSION_LOG_ENABLE)) || + (db->nsessions !=1))) { + continue; + } + } + } +#endif + + switch(db->in2out_key.k.vrf & CNAT_PRO_MASK) { + case CNAT_TCP: + if (db->flags & CNAT_DB_FLAG_TCP_ACTIVE) { + timeout = db->timeout; + if(PREDICT_FALSE(dirty_index == db_index)) { + dirty_index = -1; + } + if(PREDICT_FALSE(timeout_dirty_flag == 1)) { + timeout_dirty_flag = 0; + dirty_index = db_index; + } + if(PREDICT_FALSE(dirty_index != -1)) { + timeout = query_and_update_db_timeout( + (void *)db, MAIN_DB_TYPE); + } + if(PREDICT_TRUE(timeout == 0)) { + timeout = tcp_active_timeout; + } + } else { + timeout = tcp_initial_setup_timeout; + } + break; + case CNAT_UDP: + if (db->flags & CNAT_DB_FLAG_UDP_ACTIVE) { + timeout = db->timeout; + if(PREDICT_FALSE(dirty_index == db_index)) { + dirty_index = -1; + } + if(PREDICT_FALSE(timeout_dirty_flag == 1)) { + timeout_dirty_flag = 0; + dirty_index = db_index; + } + if(PREDICT_FALSE(dirty_index != -1)) { + timeout = query_and_update_db_timeout( + (void *)db, MAIN_DB_TYPE); + } + if(PREDICT_TRUE(timeout == 0)) { + timeout = udp_act_session_timeout; + } + } else { + timeout = udp_init_session_timeout; + } + break; + case CNAT_ICMP: + timeout = icmp_session_timeout; + break; + case CNAT_PPTP: + timeout = pptp_cfg.timeout; + break; + default: + continue; + } + + + /* Ref: CSCtu97536 */ + if (PREDICT_FALSE((db->entry_expires == 0) || + (db->entry_expires + timeout < cnat_current_time))) { +#if 0 + if (PREDICT_FALSE(db->flags & CNAT_DB_FLAG_STATIC_PORT)) { + if (PREDICT_FALSE(db->flags & CNAT_DB_DSLITE_FLAG)) { + instance = db->dslite_nat44_inst_id; + } else { + instance = NAT44_RESERVED_INST_ID; + cnat_session_log_nat44_mapping_delete(db, 0, my_vrfmap); + } + + /* Reset the session details */ + db->nsessions = 0; + db->dst_ipv4 = 0; + db->dst_port = 0; + db->flags &= ~(CNAT_DB_FLAG_TCP_ACTIVE | CNAT_DB_FLAG_UDP_ACTIVE + | CNAT_DB_FLAG_ALG_ENTRY); + db->timeout = 0; + db->entry_expires = 0; + db->alg.delta = 0; + db->proto_data.seq_pcp.tcp_seq_num = 0; + continue; + } +#endif + //printf("DELETING DB ENTRY FOR 0x%x\n", db->in2out_key.k.ipv4); + cnat_delete_main_db_entry_v2(db); + } + //free(check_these_pool_indices[i]); + } +} + +static void walk_the_db (void) +{ + pool_header_t *h = pool_header(cnat_main_db); + u32 db_uword_len; + static u32 base_index = 0, free_bitmap_index = 0; + int bits_scanned = 0, i; + uword inuse_bitmap; + + num_entries=0; + + /* Across all db entries... */ + db_uword_len = vec_len(cnat_main_db) / NUM_BITS_IN_UWORD; + if (PREDICT_FALSE(vec_len(cnat_main_db) % NUM_BITS_IN_UWORD)) { + /* + * It should not come here as in cnat_db_init_v2() + * it is made multiple of NUM_BITS_IN_UWORD + */ + ASSERT(0); + return ; + } + + if (PREDICT_FALSE(! db_uword_len)) + return ; + + while (bits_scanned < MAX_DB_ENTRY_PER_SCAN) { + + if (PREDICT_FALSE(free_bitmap_index < vec_len(h->free_bitmap))) { + + /* free_bitmap exists and it is not all 0 */ + + inuse_bitmap = ~(h->free_bitmap[free_bitmap_index]); + i = 0; + while (inuse_bitmap) { + + /* Check to see if the index is in use */ + if (PREDICT_FALSE((inuse_bitmap >> i) & 1)) { + check_these_pool_indices[num_entries] = base_index + i; + inuse_bitmap &= ~((uword) 1 << i); + num_entries++; + } + i++; + } // while (inuse_bitmap) + } else { + + /* + * 64-bit entry is 0, means all 64 entries are allocated. + * So, simply add all 64 entries here. + * No need to form inuse_bitmap, check and reset bits + */ + for (i=0; i<NUM_BITS_IN_UWORD; i++) { + + check_these_pool_indices[num_entries] = base_index + i; + num_entries++; + } + } // if (free_bitmap_index < vec_len(h->free_bitmap)) + + /* Update free_bitmap_index and base_index for next run */ + if (PREDICT_FALSE(free_bitmap_index == db_uword_len - 1)) { + /* wrap-around for next run */ + free_bitmap_index = 0; + base_index = 0; + } else { + free_bitmap_index ++; + base_index += NUM_BITS_IN_UWORD; + } + + /* increment # of bits scanned */ + bits_scanned += NUM_BITS_IN_UWORD; + + /* Found enough entries to check ? */ + if (PREDICT_FALSE(num_entries >= MAX_DB_ENTRY_SELECTED_PER_SCAN)) + { + /* This check is introduced to keep fixed MAX scan entry value */ + /* This is very much required when we do scanning for NAT64 */ + /* please check comments in cnat_db_scanner() & + * handler_nat64_db_scanner() */ + if (num_entries >= MAX_COMBINED_DB_ENTRIES_PER_SCAN) { + num_entries = MAX_COMBINED_DB_ENTRIES_PER_SCAN; + } + break; + } + + } // while (bits_scanned < MAX_DB_ENTRY_PER_SCAN) + + if (PREDICT_FALSE(num_entries > 0)) { + //printf("%s: num_entries [%d]\n", __func__, num_entries); + cnat_db_scanner(); + } + return ; +} + +static uword cnat_db_scanner_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + f64 timeout = 0.01; /* timeout value in sec (10 ms) */ + static u8 timeout_count = 0; + + uword event_type; + uword * event_data = 0; + /* Wait until vCGN is configured */ + while (1) { + /* Assigning a huge timeout value, vCGN may or + * may not get configured within this timeout */ + vlib_process_wait_for_event_or_clock (vm, 1e9); + event_type = vlib_process_get_events (vm, &event_data); + + /* check whether the process is waken up by correct guy, + * otherwise continue waiting for the vCGN config */ + if (event_type == CNAT_DB_SCANNER_TURN_ON) { + break; + } + } + + while(1) { + vlib_process_suspend(vm, timeout); + + /* Above suspend API should serve the purpose, no need to invoke wait API */ + /* vlib_process_wait_for_event_or_clock (vm, timeout); */ + + /* Lets make use of this timeout for netflow packet sent */ + if (timeout_count < 100) { /* 100*10 ms = 1 sec */ + timeout_count++; + } else { + if (nfv9_configured) { + handle_pending_nfv9_pkts(); + } + timeout_count = 0; + } + /* Do we need this ? */ + //event_type = vlib_process_get_events (vm, &event_data); + cnat_current_time = (u32)vlib_time_now (vm); + if (cnat_db_init_done) { + walk_the_db(); + } + } + + return 0; +} + + +VLIB_REGISTER_NODE (cnat_db_scanner_node) = { + .function = cnat_db_scanner_fn, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "cnat-db-scanner", + .process_log2_n_stack_bytes = 18, +}; + +clib_error_t *cnat_db_scanner_init (vlib_main_t *vm) +{ + cnat_db_scanner_main_t *mp = &cnat_db_scanner_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +void cnat_scanner_db_process_turn_on(vlib_main_t *vm) +{ + vlib_process_signal_event (vm, cnat_db_scanner_node.index, + CNAT_DB_SCANNER_TURN_ON, 0); + return; +} + +VLIB_INIT_FUNCTION (cnat_db_scanner_init); + diff --git a/vnet/vnet/vcgn/cnat_db_v2.c b/vnet/vnet/vcgn/cnat_db_v2.c new file mode 100644 index 00000000000..c09a73ebb15 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_db_v2.c @@ -0,0 +1,3716 @@ +/* + *------------------------------------------------------------------ + * cnat_db_v2.c - translation database definitions + * + * Copyright (c) 2007-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> +#include <vppinfra/clib.h> +#include <vppinfra/error.h> + +#include "cnat_db.h" +#include "cnat_config.h" +#include "cnat_global.h" +#include "cnat_v4_functions.h" +#include "cnat_log_api.h" +#include "cnat_cli.h" +#include "spp_platform_trace_log.h" +#include "cnat_bulk_port.h" +#include "nat64_db.h" +#include "dslite_db.h" +#include "cnat_config_api.h" + +#define HASH_TABLE_SIZE 8192 // hash table size +#define THROTTLE_TIME 180 // throttle time value for out of port msg/user + +u8 cnat_db_init_done = 0; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_db_v2_main_t; + +cnat_db_v2_main_t cnat_db_v2_main; + +#if 1 +/* TOBE_PORTED : Remove the following once fixed */ +#undef PREDICT_TRUE +#undef PREDICT_FALSE +#define PREDICT_TRUE(x) (x) +#define PREDICT_FALSE(x) (x) +#endif + +#define foreach_cnat_db_v2_error \ +_(DROP, "error-drop packets") + +typedef enum { +#define _(sym,str) CNAT_DB_V2_##sym, + foreach_cnat_db_v2_error +#undef _ + CNAT_DB_V2_N_ERROR, +} cnat_db_v2_error_t; + +static char * cnat_db_v2_error_strings[] __attribute__((unused)) = { +#define _(sym,string) string, + foreach_cnat_db_v2_error +#undef _ +}; + + +void cnat_table_entry_fill_map(u32 start_addr, u32 end_addr, + cnat_portmap_v2_t **port_map_holder) +{ + u32 this_start_addr, this_end_addr, this_addr, new; + u32 loop_count; + u32 pm_len, i; + cnat_portmap_v2_t *my_pm =0; + cnat_portmap_v2_t *pm = 0; + + my_instance_number = 0; + + this_start_addr = start_addr; + this_end_addr = end_addr; + + /* + * How many new addresses are getting added ?? + */ + /* commenting this. Right now end - start will be for this vCGN instance */ + //new = ((this_end_addr - this_start_addr) / MAX_CORES_PER_PARTITION) + 1; + new = (this_end_addr - this_start_addr) + 1; + + pm = *port_map_holder; + pm_len = vec_len(pm); +#if DEBUG_NOT_COMMENTED + printf("this_start_addr = 0x%08X, this_end_addr = 0x%08X, Num Addr = %d\n", + this_start_addr, this_end_addr, new); + printf("pm_len = %d\n", pm_len); +#endif + /* Check whether the address pool add requested already exists */ + my_pm = pm; + for(i = 0; i< pm_len; i++) { + if(my_pm->ipv4_address == this_start_addr) { + printf("address pool with addr 0x%08X exists\n", this_start_addr); + return; + } + my_pm++; + } + + /* + * For now give a warning message only.... + */ +#if 0 + if ((total_address_pool_allocated + new) > + CNAT_MAX_ADDR_POOL_SIZE_PER_CORE) { + printf("address pool size (%d) would cross permissible limit (%u) \n", + (total_address_pool_allocated + new), + CNAT_MAX_ADDR_POOL_SIZE_PER_CORE); + } +#endif + + total_address_pool_allocated += new; + vec_add2(pm, my_pm, new); + +#if DEBUG_NOT_COMMENTED + printf("total_address_pool_allocated changed from %d to %d (added %d)", + (total_address_pool_allocated - new), + total_address_pool_allocated, new); + printf("vec add is ok\n"); +#endif + + memset(my_pm, 0, new*sizeof(*my_pm)); + this_addr = this_start_addr; + loop_count = 0; /* Sanity counter */ + + while (this_addr <= this_end_addr) { +#if DEBUG_NOT_COMMENTED + printf("loop %d: this addr = 0x%08X\n", loop_count+1, this_addr); +#endif + my_pm->ipv4_address = this_addr; + /* + * Set all bits to "1" indicating all ports are free + */ + memset(my_pm->bm, 0xff, + (((BITS_PER_INST + BITS(uword)-1)/BITS(uword))*(sizeof(uword)))); + //this_addr += MAX_CORES_PER_PARTITION; + this_addr += 1; + my_pm++; + loop_count++; + } + /* + * We should have loop_count same as the new value + */ + if (loop_count != new) { + printf("Mismatch in loop_count (%d) != new (%d)\n", + loop_count, new); + } + + *port_map_holder = pm; + +#if DEBUG_NOT_COMMENTED + printf("revised pm len %d\n", vec_len(*port_map_holder)); +#endif + + return; +} + + +void cnat_delete_session_db_entry (cnat_session_entry_t *ep, u8 log); +inline void handle_cnat_port_exceeded_logging( + cnat_user_db_entry_t *udb, + cnat_key_t * key, + cnat_vrfmap_t *vrfmap); + +cnat_global_counters_t cnat_global_counters; +u32 last_log_timestamp = 0; +u32 last_user_dyn_port_exc_timestamp = 0; +u32 last_user_stat_port_exc_timestamp = 0; + +index_slist_t *cnat_out2in_hash; +index_slist_t *cnat_in2out_hash; +index_slist_t *cnat_user_hash; +index_slist_t *cnat_timeout_hash; +index_slist_t *cnat_session_hash; + +cnat_main_db_entry_t *cnat_main_db; +cnat_user_db_entry_t *cnat_user_db; +cnat_session_entry_t *cnat_session_db; +cnat_timeout_db_entry_t *cnat_timeout_db; + +cgse_nat_db_entry_t *cgse_nat_db; +cgse_nat_user_db_entry_t *cgse_user_db; +cgse_nat_session_db_entry_t *cgse_session_db; + +nat44_dslite_common_stats_t nat44_dslite_common_stats[255]; /* 0 is for nat44 */ +nat44_dslite_global_stats_t nat44_dslite_global_stats[2]; /* 0 for nat44 and 1 for dslite */ +nat44_counters_stats_t nat44_counters_stats[CNAT_MAX_VRFMAP_ENTRIES]; +/*For displaying show cgn <cgn-name> inside-vrf <vrf-name> counters */ + +/* + * This is the pool of vrf map structures used by latest main-db functions + */ +cnat_vrfmap_t *cnat_map_by_vrf; + +/* + * Have a mapping table of vrf_id-->vrf_map_index + * This helps in easily getting the vrf_map structure during + * main-db create paths + */ +u16 vrf_map_array[CNAT_MAX_VRFMAP_ENTRIES]; +cnat_svi_params_entry svi_params_array[CNAT_MAX_VRFMAP_ENTRIES]; +cnat_ingress_vrfid_name_entry vrfid_name_map[MAX_VRFID] = {{0}}; +u64 in2out_drops_port_limit_exceeded; +u64 in2out_drops_system_limit_reached; +u64 in2out_drops_resource_depletion; +u64 no_translation_entry_drops; +u32 no_sessions; + +#define CNAT_SET_ICMP_MSG_INFO \ +if (PREDICT_TRUE((my_vrfmap->i_vrf < CNAT_MAX_VRFMAP_ENTRIES) && \ + (svi_params_array[my_vrfmap->i_vrf].ipv4_addr))) { \ + info->gen_icmp_msg = icmp_msg_gen_allowed(); \ + info->svi_addr = svi_params_array[my_vrfmap->i_vrf].ipv4_addr; \ +} + +#define CNAT_DEBUG_INSIDE_ERR(err) \ +if (((protocol == CNAT_UDP) && \ + (debug_i_flag & CNAT_DEBUG_ERR_UDP)) || \ + ((protocol == CNAT_TCP) && \ + (debug_i_flag & CNAT_DEBUG_ERR_TCP)) || \ + ((protocol == CNAT_ICMP) && \ + (debug_i_flag & CNAT_DEBUG_ERR_ICMP))) { \ + cnat_db_debug_error(&u_ki, err); \ +} + +#define DSLITE_DEBUG_INSIDE_ERR(err) \ +if (((protocol == CNAT_UDP) && \ + (debug_i_flag & CNAT_DEBUG_ERR_UDP)) || \ + ((protocol == CNAT_TCP) && \ + (debug_i_flag & CNAT_DEBUG_ERR_TCP)) || \ + ((protocol == CNAT_ICMP) && \ + (debug_i_flag & CNAT_DEBUG_ERR_ICMP))) { \ + dslite_db_debug_error(&u_ki, err); \ +} + +#define PORT_LIMIT_LOW_THRESHOLD_FOR_SYSLOG 7 +/* If the max_limit is less than 10, no meaningful throttling can be + * done.. so, log only once per user and never clear the flag + * once the user exceeds limit + */ +#define CHECK_CLEAR_PORT_LIMIT_EXCEED_FLAG(udb, max_limit) \ + if(PREDICT_FALSE(udb->flags & CNAT_USER_DB_PORT_LIMIT_EXCEEDED)) { \ + if(udb->ntranslations < \ + ((max_limit/10)*PORT_LIMIT_LOW_THRESHOLD_FOR_SYSLOG) && \ + max_limit >= 10) { \ + udb->flags = udb->flags & (~CNAT_USER_DB_PORT_LIMIT_EXCEEDED); \ + } \ + } + +#ifdef TOBE_PORTED +/* Commented to remove unused variable warning */ +static char *debug_db_error[] = { + "no error", /* CNAT_SUCCESS */ + "no config", /*CNAT_NO_CONFIG*/ + "not in run state", /*CNAT_NO_VRF_RUN*/ + "no pool for any", /*CNAT_NO_POOL_ANY*/ + "no port for any", /*CNAT_NO_PORT_ANY*/ + "bad in use for any", /*CNAT_BAD_INUSE_ANY*/ + "not found for any", /*CNAT_NOT_FOUND_ANY*/ + "invalid index for direct", /*CNAT_INV_PORT_DIRECT*/ + "deleted addr for direct", /*CNAT_DEL_PORT_DIRECT*/ + "bad in use for direct",/*CNAT_BAD_INUSE_DIRECT*/ + "not found for direct",/*CNAT_NOT_FOUND_DIRECT*/ + "out of port limit", /*CNAT_OUT_LIMIT*/ + "main db limit", /*CNAT_MAIN_DB_LIMIT*/ + "user db limit", /*CNAT_USER_DB_LIMIT*/ + "not static port", /*CNAT_NOT_STATIC_PORT*/ + "bad static port request", /*CNAT_BAD_STATIC_PORT_REQ*/ + "not this core", /*CNAT_NOT_THIS_CORE*/ + "parser error", /*CNAT_ERR_PARSER*/ + "invalid msg id", /*CNAT_ERR_INVALID_MSG_ID*/ + "invalid msg size", /*CNAT_ERR_INVALID_MSG_SIZE*/ + "invalid payload size", /*CNAT_ERR_INVALID_PAYLOAD_SIZE*/ + "bad tcp udp port", /*CNAT_ERR_BAD_TCP_UDP_PORT*/ + "bulk single failure", /*CNAT_ERR_BULK_SINGLE_FAILURE*/ + "xlat id invalid", /*CNAT_ERR_XLAT_ID_INVALID*/ + "xlat v6 prefix invalid", /*CNAT_ERR_XLAT_V6_PREFIX_INVALID*/ + "xlat v4 prefix invalid", /*CNAT_ERR_XLAT_V4_PREFIX_INVALID*/ + "xlat tcp mss invalid", /*CNAT_ERR_XLAT_TCP_MSS_INVALID*/ + "6rd id invalid", /*CNAT_ERR_6RD_ID_INVALID*/ + "6rd v4 tunnel src invalid", /*CNAT_ERR_6RD_V4_TUNNEL_SRC_INVALID*/ + "6rd v6 prefix invalid", /*CNAT_ERR_6RD_V6_PREFIX_INVALID*/ + "6rd v6 BR unicast invalid", /*CNAT_ERR_6RD_V6_BR_UNICAST_INVALID*/ + "6rd v4 prefix masklen invalid", /*CNAT_ERR_6RD_V4_PREFIX_MASK_LEN_INVALID*/ + "6rd v4 suffix masklen invalid", /*CNAT_ERR_6RD_V4_SUFFIX_MASK_LEN_INVALID*/ + "6rd v4 combo masklen invalid", /*CNAT_ERR_6RD_V4_COMBO_MASK_LEN_INVALID*/ + "6rd tunnel mtu invalid", /*CNAT_ERR_6RD_TUNNEL_MTU_INVALID*/ + "6rd tunnel ttl invalid", /*CNAT_ERR_6RD_TUNNEL_TTL_INVALID*/ + "6rd tunnel tos invalid", /*CNAT_ERR_6RD_TUNNEL_TOS_INVALID*/ +}; +#endif + +f64 port_log_timestamps[HASH_TABLE_SIZE]; /* 32 KB array per core */ + +void port_exceeded_msg_log (u32 src_addr, u16 i_vrf) +{ + u32 hash_value; + f64 current_timestamp; + vlib_main_t *vlib_main; + + vlib_main = vlib_get_main(); + current_timestamp = vlib_time_now((vlib_main_t *) vlib_main); + + hash_value = ((src_addr >> 16) ^ ((src_addr & 0xffff) ^ i_vrf)) % (1024*8); + + if (PREDICT_FALSE((current_timestamp - port_log_timestamps[hash_value]) > THROTTLE_TIME)) { + u32 arg[2] = {i_vrf, src_addr}; + /* update timestamp */ + port_log_timestamps[hash_value] = current_timestamp; + spp_printf(CNAT_USER_OUT_OF_PORTS, 2, arg); + } + + return ; +} + +static void log_port_alloc_error(cnat_errno_t error, cnat_key_t *k) +{ + u32 error_code; + u32 arr[] = {k->k.vrf, k->k.ipv4, k->k.port}; + switch (error) + { + case CNAT_NO_POOL_ANY: + error_code = CNAT_NO_POOL_FOR_ANY_ERROR; + break; + case CNAT_NO_PORT_ANY: + error_code = CNAT_NO_PORT_FOR_ANY_ERROR; + break; + case CNAT_ERR_PARSER: + error_code = CNAT_WRONG_PORT_ALLOC_TYPE; + break; + case CNAT_BAD_INUSE_ANY: + error_code = CNAT_BAD_INUSE_ANY_ERROR; + break; + case CNAT_BAD_INUSE_DIRECT: + error_code = CNAT_BAD_INUSE_DIRECT_ERROR; + break; + case CNAT_NOT_FOUND_ANY: + error_code = CNAT_NOT_FOUND_ANY_ERROR; + break; + case CNAT_NOT_FOUND_DIRECT: + error_code = CNAT_NOT_FOUND_DIRECT_ERROR; + break; + case CNAT_INV_PORT_DIRECT: + error_code = CNAT_INV_PORT_FOR_DIRECT_ERROR; + break; + default: + error_code = CNAT_NEW_PORT_ALLOC_ERROR; /* If this code is seen in the log, + it means, new error codes are to be added here */ + break; + } + spp_printf(error_code, 3, arr); +} + +void cnat_db_debug_error(cnat_db_key_bucket_t *u_ki, + cnat_errno_t error) +{ + if (PREDICT_FALSE((u_ki->k.k.vrf == debug_i_vrf) && + ((u_ki->k.k.ipv4 >= debug_i_addr_start) && + (u_ki->k.k.ipv4 <= debug_i_addr_end)))) { +#ifdef DEBUG_PRINTF_ENABLED + PLATFORM_DEBUG_PRINT("failed to allocate port due to %s " + "for i-vrf 0x%x addr 0x%x port 0x%x\n", + debug_db_error[error], u_ki->k.k.vrf, + u_ki->k.k.ipv4, u_ki->k.k.port); +#endif + { + u32 arg[] = {u_ki->k.k.vrf, u_ki->k.k.ipv4, u_ki->k.k.port}; + spp_printf(error, 3, arg); + } + } +} + +void dslite_db_debug_error(dslite_db_key_bucket_t *u_ki, + cnat_errno_t error) +{ + if (PREDICT_FALSE((u_ki->dk.ipv4_key.k.vrf == debug_i_vrf) && + ((u_ki->dk.ipv4_key.k.ipv4 >= debug_i_addr_start) && + (u_ki->dk.ipv4_key.k.ipv4 <= debug_i_addr_end)))) { +#ifdef DEBUG_PRINTF_ENABLED + PLATFORM_DEBUG_PRINT("failed to allocate port due to %s " + "for i-vrf 0x%x addr 0x%x port 0x%x\n", + debug_db_error[error], u_ki->dk.ipv4_key.k.vrf, + u_ki->dk.ipv4_key.k.ipv4, u_ki->dk.ipv4_key.k.port); +#endif + { + u32 arg[] = {u_ki->dk.ipv4_key.k.vrf, u_ki->dk.ipv4_key.k.ipv4, u_ki->dk.ipv4_key.k.port}; + spp_printf(error, 3, arg); + } + } +} + +void cnat_db_debug_i2o_drop(cnat_db_key_bucket_t *ki) +{ + if (PREDICT_FALSE(((ki->k.k.vrf & CNAT_VRF_MASK) == debug_i_vrf) && + ((ki->k.k.ipv4 >= debug_i_addr_start) && + (ki->k.k.ipv4 <= debug_i_addr_end)))) { +#ifdef DEBUG_PRINTF_ENABLED + PLATFORM_DEBUG_PRINT("pakcet[i-vrf 0x%x addr 0x%x port 0x%x] dropped\n", + ki->k.k.vrf, ki->k.k.ipv4, ki->k.k.port); +#endif + { + u32 arg[] = {ki->k.k.vrf, ki->k.k.ipv4, ki->k.k.port}; + spp_printf(CNAT_PACKET_DROP_ERROR, 3, arg); + } + } +} + +void cnat_db_in2out_hash_delete (cnat_main_db_entry_t *ep, cnat_user_db_entry_t *up) +{ + u64 a, b, c; + u32 index, bucket; + cnat_main_db_entry_t *this, *prev; + +#ifdef DSLITE_DEF + if (PREDICT_FALSE(ep->flags & CNAT_DB_DSLITE_FLAG)) { + dslite_key_t dk = { + {up->ipv6[0], up->ipv6[1], up->ipv6[2], up->ipv6[3]} , + {ep->in2out_key.k.ipv4, ep->in2out_key.k.port, ep->in2out_key.k.vrf} + }; + DSLITE_V6_GET_HASH((&dk), + bucket, + CNAT_MAIN_HASH_MASK); + DSLITE_PRINTF(1, "Delete1 DSL main hash bucket ..%u\n", bucket); + } else { + CNAT_V4_GET_HASH(ep->in2out_key.key64, + bucket, CNAT_MAIN_HASH_MASK) + DSLITE_PRINTF(1, "Delete1 NAT44 main hash bucket ..%u\n", bucket); + } +#else + CNAT_V4_GET_HASH(ep->in2out_key.key64, + bucket, CNAT_MAIN_HASH_MASK) +#endif + + index = cnat_in2out_hash[bucket].next; + + ASSERT(index != EMPTY); + + prev = 0; + do { + this = cnat_main_db + index; + if (PREDICT_TRUE(this == ep)) { + if (prev == 0) { + cnat_in2out_hash[bucket].next = ep->in2out_hash.next; + return; + } else { + prev->in2out_hash.next = ep->in2out_hash.next; + return; + } + } + prev = this; + index = this->in2out_hash.next; + } while (index != EMPTY); + + ASSERT(0); +} + +void cnat_db_out2in_hash_delete (cnat_main_db_entry_t *ep) +{ + u64 a, b, c; + u32 index, bucket; + cnat_main_db_entry_t *this, *prev; + + CNAT_V4_GET_HASH(ep->out2in_key.key64, + bucket, CNAT_MAIN_HASH_MASK) + + index = cnat_out2in_hash[bucket].next; + + ASSERT(index != EMPTY); + + prev = 0; + do { + this = cnat_main_db + index; + if (PREDICT_TRUE(this == ep)) { + if (prev == 0) { + cnat_out2in_hash[bucket].next = ep->out2in_hash.next; + return; + } else { + prev->out2in_hash.next = ep->out2in_hash.next; + return; + } + } + prev = this; + index = this->out2in_hash.next; + } while (index != EMPTY); + + ASSERT(0); +} + +cnat_main_db_entry_t* +cnat_main_db_lookup_entry(cnat_db_key_bucket_t *ki) +{ + u64 a, b, c; + u32 index; + cnat_main_db_entry_t *db; + + CNAT_V4_GET_HASH(ki->k.key64, + ki->bucket, + CNAT_MAIN_HASH_MASK); + + index = cnat_in2out_hash[ki->bucket].next; + if (PREDICT_TRUE(index == EMPTY)) { + return (NULL); + } + + do { + db = cnat_main_db + index; + if (PREDICT_TRUE(db->in2out_key.key64 == ki->k.key64)) { + return db; + } + index = db->in2out_hash.next; + } while (index != EMPTY); + + return (NULL); +} + +void cnat_user_db_delete (cnat_user_db_entry_t *up) +{ + u64 a, b, c; + u32 index, bucket; + cnat_user_db_entry_t *this, *prev; + + if (PREDICT_FALSE(up->flags & CNAT_USER_DB_NAT64_FLAG) != 0) { + /* Preventive check - Not a NAT44 entry */ + return; + } + +#if 1 + if(PREDICT_FALSE(up->flags & CNAT_USER_DB_DSLITE_FLAG)) { + dslite_key_t dk = { + {up->ipv6[0], up->ipv6[1], up->ipv6[2], up->ipv6[3]} , + {{up->key.k.ipv4, up->key.k.port, up->key.k.vrf}} + }; + + DSLITE_V6_GET_HASH((&dk), + bucket, + CNAT_USER_HASH_MASK); + DSLITE_PRINTF(1, "Delete1 DSL user hash bucket ..%u\n", bucket); + } else { + CNAT_V4_GET_HASH(up->key.key64, + bucket, CNAT_USER_HASH_MASK) + DSLITE_PRINTF(1, "Delete1 NAT44 user hash bucket ..%u\n", bucket); + } +#else + CNAT_V4_GET_HASH(up->key.key64, + bucket, CNAT_USER_HASH_MASK) + DSLITE_PRINTF(1, "Delete2 NAT44 user hash bucket ..%u\n", bucket); +#endif + + index = cnat_user_hash[bucket].next; + + ASSERT(index != EMPTY); + + prev = 0; + do { + this = cnat_user_db + index; + if (PREDICT_TRUE(this == up)) { + if (prev == 0) { + cnat_user_hash[bucket].next = up->user_hash.next; + goto found; + } else { + prev->user_hash.next = up->user_hash.next; + goto found; + } + } + prev = this; + index = this->user_hash.next; + } while (index != EMPTY); + + ASSERT(0); + + found: + pool_put(cnat_user_db, up); +} + +cnat_user_db_entry_t* +cnat_user_db_lookup_entry(cnat_db_key_bucket_t *uki) +{ + u64 a, b, c; + u32 index; + cnat_user_db_entry_t *udb=NULL; + + CNAT_V4_GET_HASH(uki->k.key64, + uki->bucket, + CNAT_USER_HASH_MASK) + + /* now: index in user vector */ + index = cnat_user_hash[uki->bucket].next; + if (PREDICT_TRUE(index != EMPTY)) { + do { + udb = cnat_user_db + index; + if (PREDICT_FALSE(udb->key.key64 == uki->k.key64)) { + return udb; + } + index = udb->user_hash.next; + } while (index != EMPTY); + } + return (NULL); +} + +cnat_user_db_entry_t* +cnat_user_db_create_entry(cnat_db_key_bucket_t *uki, + u32 portmap_index) +{ + cnat_user_db_entry_t *udb = NULL; + + pool_get(cnat_user_db, udb); + memset(udb, 0, sizeof(*udb)); + + udb->ntranslations = 1; + udb->portmap_index = portmap_index; + udb->key.key64 = uki->k.key64; + /* Add this user to the head of the bucket chain */ + udb->user_hash.next = + cnat_user_hash[uki->bucket].next; + cnat_user_hash[uki->bucket].next = udb - cnat_user_db; + +#ifndef NO_BULK_LOGGING + INIT_BULK_CACHE(udb) +#endif /* NO_BULK_LOGGING */ + return udb; +} + +cnat_main_db_entry_t* +cnat_create_main_db_entry_and_hash(cnat_db_key_bucket_t *ki, + cnat_db_key_bucket_t *ko, + cnat_user_db_entry_t *udb) +{ + u64 a, b, c; + u32 db_index; + cnat_main_db_entry_t *db = NULL; + + pool_get(cnat_main_db, db); + memset(db, 0, sizeof(*db)); + + db_index = db - cnat_main_db; + db->in2out_key.k.ipv4 = ki->k.k.ipv4; + db->in2out_key.k.port = ki->k.k.port; + db->in2out_key.k.vrf = ki->k.k.vrf; + db->out2in_key.k.ipv4 = ko->k.k.ipv4; + db->out2in_key.k.port = ko->k.k.port; + db->out2in_key.k.vrf = ko->k.k.vrf; + + db->user_ports.next = db_index; + db->user_ports.prev = db_index; + db->user_index = udb - cnat_user_db; + //db->portmap_index = udb->portmap_index; + db->flags &= ~(CNAT_DB_DSLITE_FLAG); // Mark that it is not dslite + if (PREDICT_FALSE(udb->ntranslations == 1)) { + /* + * first port for this src vrf/src ip addr + */ + udb->translation_list_head_index = db_index; + } else { + index_dlist_addtail(udb->translation_list_head_index, + (u8 *)cnat_main_db, sizeof(cnat_main_db[0]), + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports), + db_index); + } + + /* + * setup o2i hash key + */ + CNAT_V4_GET_HASH(ko->k.key64, + ko->bucket, + CNAT_MAIN_HASH_MASK) + db->out2in_hash.next = cnat_out2in_hash[ko->bucket].next; + cnat_out2in_hash[ko->bucket].next = db_index; + /* + * setup i2o hash key, bucket is already calculate + */ + db->in2out_hash.next = cnat_in2out_hash[ki->bucket].next; + cnat_in2out_hash[ki->bucket].next = db_index; + +#if DEBUG > 1 + printf("\nMy_Instance_Number %d: Bucket %d, Db_Index %d", + my_instance_number, ki->bucket, db_index); + printf("\nInside (VRF 0x%x, IP 0x%x, PORT 0x%x)", + db->in2out_key.k.vrf, db->in2out_key.k.ipv4, db->in2out_key.k.port); + printf("\nOutside (VRF 0x%x, IP 0x%x, PORT 0x%x)", + db->out2in_key.k.vrf, db->out2in_key.k.ipv4, db->out2in_key.k.port); + printf("\nUser Index %d, IP 0x%x", + db->user_index, udb->key.k.ipv4); +#endif + + NAT44_COMMON_STATS.active_translations++; + + return db; +} + +static inline void pptp_clear_all_channels( + cnat_main_db_entry_t *db) +{ + u32 db_index, current_db_index; + cnat_main_db_entry_t *temp_db; + + /* clear all channels */ + + db_index = db->proto_data.pptp_list.next; + current_db_index = db - cnat_main_db; + + while( db_index != EMPTY) { + temp_db = cnat_main_db + db_index; + db_index = temp_db->proto_data.pptp_list.next; + temp_db->entry_expires = 0; + if(PREDICT_FALSE(temp_db->proto_data.pptp_list.prev + == current_db_index)) { // Decouple child GREs from parent + temp_db->proto_data.pptp_list.prev = EMPTY; + } + } + + db->proto_data.pptp_list.next = EMPTY; +} + +void pptp_remove_channel_from_tunnel(cnat_main_db_entry_t *db) { + + cnat_main_db_entry_t *prev_db, *next_db; + + prev_db = cnat_main_db + db->proto_data.pptp_list.prev; + next_db = cnat_main_db + db->proto_data.pptp_list.next; + + /* remove entry from the tunnel list */ + if(PREDICT_TRUE(db->proto_data.pptp_list.prev != EMPTY)) { + prev_db->proto_data.pptp_list.next = + db->proto_data.pptp_list.next ; + } + + if(db->proto_data.pptp_list.next != EMPTY) { + next_db->proto_data.pptp_list.prev + = db->proto_data.pptp_list.prev; + } + +} + +void cnat_delete_main_db_entry_v2 (cnat_main_db_entry_t *ep) +{ + u32 main_db_index; + u32 vrfmap_len, udb_len; + cnat_user_db_entry_t *up =0; + cnat_portmap_v2_t *pm =0; + cnat_portmap_v2_t *my_pm =0; + cnat_vrfmap_t *my_vrfmap =0; + u16 static_port_range; +#ifndef NO_BULK_LOGGING + bulk_alloc_size_t bulk_size; + int nfv9_log_req = BULK_ALLOC_NOT_ATTEMPTED; +#endif + pool_header_t *h = pool_header(cnat_user_db); + u16 instance = 0; + u32 my_index; + + + if (PREDICT_FALSE(ep->flags & CNAT_DB_NAT64_FLAG) != 0) { + /* Preventive check - Not a NAT44 entry */ + return; + } + + if(PREDICT_FALSE(ep->flags & + CNAT_DB_FLAG_PPTP_TUNNEL_ACTIVE)) { + pptp_clear_all_channels(ep); + PPTP_DECR(active_tunnels); + } + + if(PREDICT_FALSE(ep->flags & + CNAT_DB_FLAG_PPTP_GRE_ENTRY)) { + pptp_remove_channel_from_tunnel(ep); + PPTP_DECR(active_channels); + } + + /* This function gets called from various locations.. + * many times from config handler.. so we + * to ensure that multiple sessions if any are + * released + */ + + if(PREDICT_FALSE(ep->nsessions > 1)) { + cnat_session_entry_t *sdb; + while(ep->nsessions > 1 && + ep->session_head_index != EMPTY) { + sdb = cnat_session_db + ep->session_head_index; + cnat_delete_session_db_entry(sdb, TRUE); + } + } + + /* Find the set of portmaps for the outside vrf */ + vrfmap_len = vec_len(cnat_map_by_vrf); + udb_len = vec_len(cnat_user_db); + + /* In case of invalid user just return, deleting only main db + * is not a good idea, since some valid user db entry might be pointing + * to that main db and hence leave the dbs in a inconsistent state + */ + if (PREDICT_FALSE((ep->user_index >= udb_len) || + (clib_bitmap_get(h->free_bitmap, ep->user_index)))) { +#ifdef DEBUG_PRINTF_ENABLED + printf("invalid/unused user index in db %d\n", ep->user_index); +#endif + spp_printf(CNAT_INV_UNUSED_USR_INDEX, 1, (u32 *) &(ep->user_index)); + cnat_main_db_entry_dump(ep); + return; + } + + up = cnat_user_db + ep->user_index; + +/* Point to the right portmap list */ +if (PREDICT_FALSE(ep->flags & CNAT_DB_DSLITE_FLAG)) { + instance = ep->dslite_nat44_inst_id; + pm = dslite_table_db_ptr[instance].portmap_list; + if(PREDICT_FALSE((pm == NULL))) { + DSLITE_PRINTF(3, "NULL portmap list for dslite_id %u, state %u\n", + instance, dslite_table_db_ptr[instance].state); + cnat_main_db_entry_dump(ep); + goto delete_entry; + } + static_port_range = + STAT_PORT_RANGE_FROM_INST_PTR(&(dslite_table_db_ptr[instance])); + /* + * Netflow logging API for delete event + */ + bulk_size = + BULKSIZE_FROM_VRFMAP(&(dslite_table_db_ptr[instance])); +} else { + if (PREDICT_FALSE(ep->vrfmap_index >= vrfmap_len)) { +#ifdef DEBUG_PRINTF_ENABLED + printf("invalid vrfmap index in db\n"); +#endif + spp_printf(CNAT_INVALID_VRFMAP_INDEX, 0, NULL); + cnat_main_db_entry_dump(ep); + goto delete_entry; + } + instance = NAT44_RESERVED_INST_ID; + my_vrfmap = cnat_map_by_vrf + ep->vrfmap_index; + pm = my_vrfmap->portmap_list; + static_port_range = cnat_static_port_range; + bulk_size = BULKSIZE_FROM_VRFMAP(my_vrfmap); +} + + if (PREDICT_FALSE(ep->flags & CNAT_DB_FLAG_PORT_PAIR)) { + /* Give back the port(s) */ + cnat_port_free_v2_bulk(pm, up->portmap_index, + PORT_PAIR, ep->out2in_key.k.port, up, static_port_range +#ifndef NO_BULK_LOGGING + , bulk_size, &nfv9_log_req +#endif + ); + } else { + /* Give back the port(s) */ + cnat_port_free_v2_bulk (pm, up->portmap_index, + PORT_SINGLE, ep->out2in_key.k.port, up, static_port_range +#ifndef NO_BULK_LOGGING + , bulk_size, &nfv9_log_req +#endif + ); + } + + if (PREDICT_TRUE(!(ep->flags & CNAT_DB_DSLITE_FLAG))) { + if(PREDICT_FALSE(nfv9_log_req != CACHE_ALLOC_NO_LOG_REQUIRED)) { + if(PREDICT_FALSE(my_vrfmap->nf_logging_policy == SESSION_LOG_ENABLE)) { + if(ep->nsessions != 0) { + cnat_nfv9_nat44_log_session_delete(ep, NULL, my_vrfmap); + } + } else { + cnat_nfv9_log_mapping_delete(ep, my_vrfmap +#ifndef NO_BULK_LOGGING + , nfv9_log_req +#endif + ); + } + if(PREDICT_TRUE((my_vrfmap->syslog_logging_policy != SESSION_LOG_ENABLE) || + (ep->nsessions != 0))) { + cnat_syslog_nat44_mapping_delete(ep, my_vrfmap, NULL +#ifndef NO_BULK_LOGGING + , nfv9_log_req +#endif + ); + } + } + } else { + if(PREDICT_FALSE(nfv9_log_req != CACHE_ALLOC_NO_LOG_REQUIRED)) { + if(PREDICT_FALSE( dslite_table_db_ptr[instance].nf_logging_policy == + SESSION_LOG_ENABLE)) { + cnat_nfv9_ds_lite_log_session_delete(ep, + (dslite_table_db_ptr + instance),NULL); + } else { + cnat_nfv9_ds_lite_mapping_delete(ep, + (dslite_table_db_ptr + instance) +#ifndef NO_BULK_LOGGING + , nfv9_log_req +#endif + ); + } +#ifdef TOBE_PORTED + cnat_syslog_ds_lite_mapping_delete(ep, + (dslite_table_db_ptr + instance), NULL +#ifndef NO_BULK_LOGGING + , nfv9_log_req +#endif + ); +#endif /* TOBE_PORTED */ + } + } + +delete_entry: + + main_db_index = ep - cnat_main_db; + + up->ntranslations--; + + /* + * when user reaches max allowed port limit + * we generate icmp msg and inc the counter + * when counter reach the icmp msg rate limit + * we stop icmp msg gen + * when a user port is freed + * that means we need to clear the msg gen counter + * so that next time + * reach max port limit, we can generate new icmp msg again + */ + up->icmp_msg_count = 0; + + up->translation_list_head_index = index_dlist_remelem ( + up->translation_list_head_index, (u8 *)cnat_main_db, + sizeof (cnat_main_db[0]), + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports), + main_db_index); + + cnat_db_in2out_hash_delete(ep, up); + + if (PREDICT_FALSE(up->ntranslations == 0)) { + ASSERT(up->translation_list_head_index == EMPTY); + nat44_dslite_common_stats[instance].num_subscribers--; + my_index = up->portmap_index; + my_pm = pm + my_index; + if(PREDICT_TRUE(my_pm->private_ip_users_count)) { + my_pm->private_ip_users_count--; +#ifdef DEBUG_PRINTF_IP_N_TO_1_ENABLED + PLATFORM_DEBUG_PRINT("\n cnat_delete_main_db_entry_v2 " + "private_ip_users_count = %d", + my_pm->private_ip_users_count); +#endif + + } + cnat_user_db_delete(up); + + } + + /* Remove from main DB hashes */ + //cnat_db_in2out_hash_delete(ep); + cnat_db_out2in_hash_delete(ep); + + pool_put(cnat_main_db, ep); + + if(PREDICT_FALSE(ep->flags & CNAT_DB_FLAG_STATIC_PORT)) { + nat44_dslite_common_stats[instance].num_static_translations--; + } else { + nat44_dslite_common_stats[instance].num_dynamic_translations--; + } + nat44_dslite_common_stats[instance].active_translations--; + nat44_dslite_global_stats[!!(instance - 1)].translation_delete_count ++; +} + +cnat_main_db_entry_t* +cnat_main_db_lookup_entry_out2in (cnat_db_key_bucket_t *ko) +{ + u64 a, b, c; + u32 index; + cnat_main_db_entry_t *db; + + CNAT_V4_GET_HASH(ko->k.key64, + ko->bucket, + CNAT_MAIN_HASH_MASK); + + index = cnat_out2in_hash[ko->bucket].next; + if (PREDICT_TRUE(index == EMPTY)) { + return (NULL); + } + + do { + db = cnat_main_db + index; + if (PREDICT_TRUE(db->out2in_key.key64 == ko->k.key64)) { + return db; + } + index = db->out2in_hash.next; + } while (index != EMPTY); + + return (NULL); +} + +/* Creates 2 sessions. + * Moves the default dest info from mdb to first session + * Fills the dest_info details in to second session and + * returns the pointer to second session + */ +cnat_session_entry_t *cnat_handle_1to2_session( + cnat_main_db_entry_t *mdb, + cnat_key_t *dest_info) +{ + cnat_key_t old_dest_info; + pool_header_t *h; + u32 free_session = 0; + u16 instance; + cnat_session_entry_t *session_db1 = NULL, *session_db2 = NULL; + + h = pool_header(cnat_session_db); + free_session = vec_len(h->free_indices) - 1; + + if (PREDICT_FALSE(free_session < 2)) { + if (mdb->flags & CNAT_DB_DSLITE_FLAG) { + instance = mdb->dslite_nat44_inst_id; + } else { + instance = NAT44_RESERVED_INST_ID; + } + + /* we need 2 sessions here, return NULL */ + nat44_dslite_common_stats[instance].drops_sessiondb_limit_exceeded++; + return NULL; + } + + old_dest_info.k.ipv4 = mdb->dst_ipv4; + old_dest_info.k.port = mdb->dst_port; + old_dest_info.k.vrf = mdb->in2out_key.k.vrf; + + /* create 2 new sessions */ + session_db1 = cnat_create_session_db_entry(&old_dest_info, + mdb, FALSE); + + if(PREDICT_FALSE(session_db1 == NULL)) { + return NULL; + } + + /* update pkt info to session 2 */ + session_db2 = cnat_create_session_db_entry(dest_info, + mdb, TRUE); + + if(PREDICT_FALSE(session_db2 == NULL)) { + cnat_delete_session_db_entry(session_db1, FALSE); + return NULL; + } + /* update main db info to session 1 */ + cnat_dest_update_main2session(mdb, session_db1); + + return session_db2; +} + +/* The below function shold be called only + * when a NAT44 STATIC entry received traffic + * for the first time. This is to ensure + * the destination is noted and logged + */ +void cnat_add_dest_n_log( + cnat_main_db_entry_t *mdb, + cnat_key_t *dest_info) +{ + + if(PREDICT_FALSE(mdb->nsessions != 0)) { + return; /* Should not have been called */ + } + + mdb->dst_ipv4 = dest_info->k.ipv4; + mdb->dst_port = dest_info->k.port; + mdb->nsessions = 1; + mdb->entry_expires = cnat_current_time; + u16 instance; + + if (mdb->flags & CNAT_DB_DSLITE_FLAG) { + instance = mdb->dslite_nat44_inst_id; + cnat_session_log_ds_lite_mapping_create(mdb, + (dslite_table_db_ptr + instance),NULL); + } else { + instance = NAT44_RESERVED_INST_ID; + cnat_vrfmap_t *my_vrfmap = cnat_map_by_vrf + mdb->vrfmap_index; + cnat_session_log_nat44_mapping_create(mdb, 0, my_vrfmap); + } +} + +/* + * this function is called by exception node + * when lookup is fialed in i2o node + * + * if reash per user port limit, + * set user_db_entry pointer, and error == CNAT_OUT_LIMIT + */ +cnat_main_db_entry_t* +cnat_get_main_db_entry_v2(cnat_db_key_bucket_t *ki, + port_pair_t port_pair_type, + port_type_t port_type, + cnat_gen_icmp_info *info, + cnat_key_t *dest_info) +{ + u16 protocol; + cnat_errno_t rv; + cnat_db_key_bucket_t u_ki, ko; + u32 my_index, free_main, free_user; + u32 current_timestamp; + u16 my_vrfmap_index; + u16 my_vrfmap_entry_found = 0; + cnat_vrfmap_t *my_vrfmap =0; + cnat_portmap_v2_t *pm =0; + cnat_user_db_entry_t *udb = 0; + cnat_main_db_entry_t *db = 0; + pool_header_t *h; + u16 port_limit; + cnat_portmap_v2_t *my_pm = 0; + +#ifndef NO_BULK_LOGGING + int nfv9_log_req = BULK_ALLOC_NOT_ATTEMPTED; +#endif + + + /* + * need to try lookup again because + * second pkt may come here before the entry is created + * by receiving first pkt due to high line rate. + */ + info->gen_icmp_msg = CNAT_NO_ICMP_MSG; + info->error = CNAT_SUCCESS; + db = cnat_main_db_lookup_entry(ki); + if (PREDICT_TRUE(db)) { + /* what if the source is talking to a + * new dest now? We will have to handle this case and + * take care of - creating session db and logging + */ + if(PREDICT_FALSE((!dest_info->k.ipv4) && (!dest_info->k.port))) { + return db; /* if dest_info is null don't create session */ + } + if(PREDICT_TRUE((db->dst_ipv4 == dest_info->k.ipv4) && + (db->dst_port == dest_info->k.port))) { + return db; + } + dest_info->k.vrf = db->in2out_key.k.vrf; + /* Src is indeed talking to a different dest */ + cnat_session_entry_t *session_db2 = NULL; + if(PREDICT_TRUE(db->nsessions == 1)) { + session_db2 = cnat_handle_1to2_session(db, dest_info); + if(PREDICT_TRUE(session_db2 != NULL)) { + CNAT_DB_TIMEOUT_RST(session_db2); + return db; + } else { + info->error = CNAT_ERR_NO_SESSION_DB; + return NULL; + } + } else if(PREDICT_FALSE(db->nsessions == 0)) { + /* Should be static entry.. should never happen + */ + if(PREDICT_TRUE(dest_info->k.ipv4 != 0)) { + cnat_add_dest_n_log(db, dest_info); + } + return db; + } else { + /* The src has already created multiple sessions.. very rare + */ + session_db2 = cnat_create_session_db_entry(dest_info, + db, TRUE); + if(PREDICT_TRUE(session_db2 != NULL)) { + CNAT_DB_TIMEOUT_RST(session_db2); + return db; + } else { + info->error = CNAT_ERR_NO_SESSION_DB; + return NULL; + } + } + + } + + /* + * step 1. check if outside vrf is configured or not + * and Find the set of portmaps for the outside vrf + * insider vrf is one to one mappted to outside vrf + * key is vrf and ip only + * ki.k.k.vrf has protocol bits, mask out + */ + protocol = ki->k.k.vrf & CNAT_PRO_MASK; + u_ki.k.k.vrf = ki->k.k.vrf & CNAT_VRF_MASK; + u_ki.k.k.ipv4 = ki->k.k.ipv4; + u_ki.k.k.port = 0; + + my_vrfmap_index = vrf_map_array[u_ki.k.k.vrf]; + my_vrfmap = cnat_map_by_vrf + my_vrfmap_index; + + my_vrfmap_entry_found = ((my_vrfmap_index != VRF_MAP_ENTRY_EMPTY) && + (my_vrfmap->status == S_RUN) && + (my_vrfmap->i_vrf == u_ki.k.k.vrf)); + + if (PREDICT_FALSE(!my_vrfmap_entry_found)) { + u32 arr[] = {ki->k.k.vrf, ki->k.k.ipv4, ki->k.k.port}; + if ((my_vrfmap_index == VRF_MAP_ENTRY_EMPTY) || + (my_vrfmap->i_vrf == u_ki.k.k.vrf)) { + info->error = CNAT_NO_CONFIG; + CNAT_DEBUG_INSIDE_ERR(CNAT_NO_CONFIG) + spp_printf(CNAT_NO_CONFIG_ERROR, 3, arr); + } else { + info->error = CNAT_NO_VRF_RUN; + CNAT_DEBUG_INSIDE_ERR(CNAT_NO_VRF_RUN) + spp_printf(CNAT_NO_VRF_RUN_ERROR, 3, arr); + } + + return (NULL); + } + + pm = my_vrfmap->portmap_list; + + port_limit = my_vrfmap->port_limit; + if(PREDICT_FALSE(!port_limit)) { + port_limit = cnat_main_db_max_ports_per_user; + } + /* + * set o2i key with protocl bits + */ + ko.k.k.vrf = my_vrfmap->o_vrf | protocol; + + /* + * step 2. check if src vrf, src ip addr is alreay + * in the user db + * if yes, use PORT_ALLOC_DIRECTED + * if no, use PORT_ALLOC_ANY since it is first time + */ + udb = cnat_user_db_lookup_entry(&u_ki); + if (PREDICT_TRUE(udb)) { + /* + * not first time allocate port for this user + * check limit + */ + if (PREDICT_FALSE(udb->ntranslations >= + port_limit)) { + /* Check for the port type here. If we are getting + * a STATIC PORT, allow the config. + */ + if (PREDICT_TRUE(port_type != PORT_TYPE_STATIC)) { + info->error = CNAT_OUT_LIMIT; + CNAT_SET_ICMP_MSG_INFO + CNAT_DEBUG_INSIDE_ERR(CNAT_OUT_LIMIT) + port_exceeded_msg_log(u_ki.k.k.ipv4, u_ki.k.k.vrf); + in2out_drops_port_limit_exceeded ++; + u_ki.k.k.port = ki->k.k.port; + u_ki.k.k.vrf = ki->k.k.vrf; + handle_cnat_port_exceeded_logging(udb, &u_ki.k, my_vrfmap); + return (NULL); + } + } + CHECK_CLEAR_PORT_LIMIT_EXCEED_FLAG(udb, + port_limit) + + /* + * check if main db has space to accomodate new entry + */ + h = pool_header(cnat_main_db); + + free_main = vec_len(h->free_indices) - 1; + if (PREDICT_FALSE(!free_main)) { + info->error = CNAT_MAIN_DB_LIMIT; + CNAT_SET_ICMP_MSG_INFO + in2out_drops_system_limit_reached ++; + CNAT_DEBUG_INSIDE_ERR(CNAT_MAIN_DB_LIMIT) + + current_timestamp = spp_trace_log_get_unix_time_in_seconds(); + if (PREDICT_FALSE((current_timestamp - last_log_timestamp) > + 1800)) { + spp_printf(CNAT_SESSION_THRESH_EXCEEDED, 0, NULL); + last_log_timestamp = current_timestamp; + } + +#ifdef UT_TEST_CODE + printf("Limit reached : OLD USER"); +#endif + return NULL; + } + + /* + * allocate port, from existing mapping + */ + my_index = udb->portmap_index; + + if (PREDICT_FALSE(port_type == PORT_TYPE_STATIC)) { + rv = cnat_static_port_alloc_v2_bulk(pm, + PORT_ALLOC_DIRECTED, + port_pair_type, + ki->k.k.ipv4, + ki->k.k.port, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + cnat_static_port_range +#ifndef NO_BULK_LOGGING + , + udb, BULKSIZE_FROM_VRFMAP(my_vrfmap), + &nfv9_log_req +#endif + , my_vrfmap->ip_n_to_1 + ); + + } else if (PREDICT_TRUE(port_type != PORT_TYPE_RTSP) ) { + + rv = cnat_dynamic_port_alloc_v2_bulk(pm, + PORT_ALLOC_DIRECTED, + port_pair_type, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + cnat_static_port_range +#ifndef NO_BULK_LOGGING + , + udb, BULKSIZE_FROM_VRFMAP(my_vrfmap), + &nfv9_log_req +#endif + , my_vrfmap->ip_n_to_1, + &(my_vrfmap->rseed_ip) + ); + + } else { + /* + * For RTSP, two translation entries are created, + * check if main db has space to accomodate two new entry + */ + free_main = free_main - 1; + if (PREDICT_FALSE(!free_main)) { + info->error = CNAT_MAIN_DB_LIMIT; + CNAT_SET_ICMP_MSG_INFO + in2out_drops_system_limit_reached ++; + CNAT_DEBUG_INSIDE_ERR(CNAT_MAIN_DB_LIMIT) + + return NULL; + } else { + rv = cnat_dynamic_port_alloc_rtsp_bulk(pm, + PORT_ALLOC_DIRECTED, + port_pair_type, + ki->k.k.port, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + cnat_static_port_range +#ifndef NO_BULK_LOGGING + , + udb, BULKSIZE_FROM_VRFMAP(my_vrfmap), + &nfv9_log_req +#endif + , &(my_vrfmap->rseed_ip) + ); + } + } + + + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + info->error = rv; + CNAT_SET_ICMP_MSG_INFO + CNAT_DEBUG_INSIDE_ERR(rv) + in2out_drops_resource_depletion++; + log_port_alloc_error(rv, &(ki->k)); + return (NULL); + } + /* + * increment port in use for this user + */ + udb->ntranslations += 1; + + } else { + /* + * first time allocate port for this user + */ + + /* + * Do not create entry if port limit is invalid + */ + + if (PREDICT_FALSE(!port_limit)) { + if (PREDICT_TRUE(port_type != PORT_TYPE_STATIC)) { + info->error = CNAT_OUT_LIMIT; + in2out_drops_port_limit_exceeded ++; + port_exceeded_msg_log(u_ki.k.k.ipv4, u_ki.k.k.vrf); + CNAT_SET_ICMP_MSG_INFO + CNAT_DEBUG_INSIDE_ERR(CNAT_OUT_LIMIT) + return (NULL); + } + } + + /* + * Check if main db has space for new entry + * Allowing a user db entry to be created if main db is not free + * will cause a port to be allocated to that user, which results in + * wastage of that port, hence the check is done here. + */ + h = pool_header(cnat_main_db); + free_main = vec_len(h->free_indices) - 1; + h = pool_header(cnat_user_db); + free_user = vec_len(h->free_indices) - 1; + + /* + * If either main_db or user_db does not have entries + * bail out, with appropriate error + */ + if (PREDICT_FALSE(!(free_main && free_user))) { + u32 log_error; + if(free_main) { + info->error = CNAT_USER_DB_LIMIT; + log_error = CNAT_USER_DB_LIMIT_ERROR; + } else { + info->error = CNAT_MAIN_DB_LIMIT; + log_error = CNAT_MAIN_DB_LIMIT_ERROR; + } + in2out_drops_system_limit_reached ++; + CNAT_SET_ICMP_MSG_INFO + CNAT_DEBUG_INSIDE_ERR(info->error) + spp_printf(log_error, 0, 0); + return NULL; + } + + if (PREDICT_FALSE(port_type == PORT_TYPE_STATIC)) { + rv = cnat_static_port_alloc_v2_bulk(pm, + PORT_ALLOC_ANY, + port_pair_type, + ki->k.k.ipv4, + ki->k.k.port, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + cnat_static_port_range +#ifndef NO_BULK_LOGGING + , + udb, BULKSIZE_FROM_VRFMAP(my_vrfmap), + &nfv9_log_req +#endif + , my_vrfmap->ip_n_to_1 + ); + + } else if (PREDICT_TRUE(port_type != PORT_TYPE_RTSP)) { + rv = cnat_dynamic_port_alloc_v2_bulk(pm, + PORT_ALLOC_ANY, + port_pair_type, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + cnat_static_port_range +#ifndef NO_BULK_LOGGING + , NULL, BULKSIZE_FROM_VRFMAP(my_vrfmap), + &nfv9_log_req +#endif + , my_vrfmap->ip_n_to_1, + &(my_vrfmap->rseed_ip) + ); + } else { + /* + * For RTSP, two translation entries are created, + * check if main db has space to accomodate two new entry + */ + free_main = free_main - 1; + if (PREDICT_FALSE(!free_main)) { + info->error = CNAT_MAIN_DB_LIMIT; + CNAT_SET_ICMP_MSG_INFO + in2out_drops_system_limit_reached ++; + CNAT_DEBUG_INSIDE_ERR(CNAT_MAIN_DB_LIMIT) + + return NULL; + } else { + + rv = cnat_dynamic_port_alloc_rtsp_bulk(pm, + PORT_ALLOC_ANY, + port_pair_type, + ki->k.k.port, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + cnat_static_port_range +#ifndef NO_BULK_LOGGING + , NULL, BULKSIZE_FROM_VRFMAP(my_vrfmap), + &nfv9_log_req +#endif + , &(my_vrfmap->rseed_ip) + ); + /* TODO: Add the port pair flag here */ + } + } + + + + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + info->error = rv; + in2out_drops_resource_depletion ++; + CNAT_SET_ICMP_MSG_INFO + CNAT_DEBUG_INSIDE_ERR(rv) + log_port_alloc_error(rv, &(ki->k)); + return (NULL); + } + /* + * create entry in user db + */ + udb = cnat_user_db_create_entry(&u_ki, my_index); + NAT44_COMMON_STATS.num_subscribers++; + my_pm = pm + my_index; + if(PREDICT_TRUE(my_pm->private_ip_users_count < PORTS_PER_ADDR)) { + my_pm->private_ip_users_count++; +#ifdef DEBUG_PRINTF_IP_N_TO_1_ENABLED + PLATFORM_DEBUG_PRINT("\n cnat_get_main_db_entry_v2 " + "dynamic alloc private_ip_users_count = %d", + my_pm->private_ip_users_count); +#endif + } else { + PLATFORM_DEBUG_PRINT("\n ERROR: private_ip_users_count has " + "reached MAX PORTS_PER_ADDR"); + } +#ifndef NO_BULK_LOGGING + if(PREDICT_TRUE(udb && (BULK_ALLOC_NOT_ATTEMPTED != nfv9_log_req))) { + cnat_update_bulk_range_cache(udb, ko.k.k.port, + BULKSIZE_FROM_VRFMAP(my_vrfmap)); + } +#endif /* #ifndef NO_BULK_LOGGING */ + + } + + /* + * step 3: + * outside port is allocated for this src vrf/src ip addr + * 1)create a new entry in main db + * 2)setup cnat_out2in_hash key + * 3)setup cnat_in2out_hash key + */ + db = cnat_create_main_db_entry_and_hash(ki, &ko, udb); + + translation_create_count ++; +#ifdef DSLITE_DEF + db->dslite_nat44_inst_id = NAT44_RESERVED_INST_ID; +#endif + db->vrfmap_index = my_vrfmap - cnat_map_by_vrf; + + /* + * don't forget logging + * logging API is unconditional, + * logging configuration check is done inside the inline function + */ + + db->dst_ipv4 = dest_info->k.ipv4; + db->dst_port = dest_info->k.port; + if(PREDICT_TRUE(db->dst_ipv4 || db->dst_port)) { + db->nsessions++; + } + + if(PREDICT_FALSE(nfv9_log_req != CACHE_ALLOC_NO_LOG_REQUIRED)) { + if(PREDICT_FALSE(my_vrfmap->nf_logging_policy == SESSION_LOG_ENABLE)) { + /* do not log for static entries.. we will log when traffic flows */ + if(PREDICT_TRUE(db->dst_ipv4 || db->dst_port)) { + cnat_nfv9_nat44_log_session_create(db, 0, my_vrfmap); + } + } else { + cnat_nfv9_log_mapping_create(db, my_vrfmap +#ifndef NO_BULK_LOGGING + , nfv9_log_req +#endif + ); + } + if(PREDICT_TRUE((my_vrfmap->syslog_logging_policy != SESSION_LOG_ENABLE) || + (db->dst_ipv4 || db->dst_port))) { + cnat_syslog_nat44_mapping_create(db, my_vrfmap, 0 +#ifndef NO_BULK_LOGGING + , nfv9_log_req +#endif + ); + } + } + if (PREDICT_FALSE(port_pair_type == PORT_PAIR)) { + cnat_main_db_entry_t *db2 = 0; + cnat_db_key_bucket_t new_ki = *ki; + u64 a, b, c; + + new_ki.k.k.port += 1; + ko.k.k.port += 1; + + CNAT_V4_GET_HASH(new_ki.k.key64, new_ki.bucket, + CNAT_MAIN_HASH_MASK); + + db2 = cnat_create_main_db_entry_and_hash(&new_ki, &ko, udb); + + translation_create_count ++; +#ifdef DSLITE_DEF + db2->dslite_nat44_inst_id = NAT44_RESERVED_INST_ID; +#endif + db2->vrfmap_index = my_vrfmap - cnat_map_by_vrf; + db2->entry_expires = cnat_current_time; + db2->flags |= CNAT_DB_FLAG_ALG_ENTRY; + udb->ntranslations += 1; + db2->dst_ipv4 = dest_info->k.ipv4; + db2->dst_port = dest_info->k.port; + db2->nsessions = 0; /* For ALG db, set sessions to 0 - CSCuf78420 */ + + if(PREDICT_FALSE(nfv9_log_req != CACHE_ALLOC_NO_LOG_REQUIRED)) { + if(PREDICT_FALSE(my_vrfmap->nf_logging_policy == SESSION_LOG_ENABLE)) { + /* do not log for static entries.. we will log when traffic flows */ + if(PREDICT_TRUE(db2->dst_ipv4 || db2->dst_port)) { + cnat_nfv9_nat44_log_session_create(db2, 0, my_vrfmap); + } + } else { + cnat_nfv9_log_mapping_create(db2, my_vrfmap +#ifndef NO_BULK_LOGGING + , nfv9_log_req +#endif + ); + } + if(PREDICT_TRUE((my_vrfmap->syslog_logging_policy != SESSION_LOG_ENABLE) || + (db2->dst_ipv4 || db2->dst_port))) { + cnat_syslog_nat44_mapping_create(db2, my_vrfmap, 0 +#ifndef NO_BULK_LOGGING + , nfv9_log_req +#endif + ); + } + } + } + + return db; +} + +/* + * this function is called from config handler only + * to allocate a static port based db entry + * + * the actual mapped address and port are already specified + */ +cnat_main_db_entry_t* +cnat_create_static_main_db_entry_v2 (cnat_db_key_bucket_t *ki, + cnat_db_key_bucket_t *ko, + cnat_vrfmap_t *my_vrfmap, + cnat_gen_icmp_info *info) +{ + u16 protocol; + u32 head; + cnat_errno_t rv; + cnat_db_key_bucket_t u_ki; + u32 my_index, free_main, free_user; + cnat_portmap_v2_t *pm =0; + cnat_portmap_v2_t *my_pm =0; + cnat_user_db_entry_t *udb = 0; + cnat_main_db_entry_t *db = 0; + pool_header_t *h; +#ifndef NO_BULK_LOGGING + int nfv9_log_req = BULK_ALLOC_NOT_ATTEMPTED; +#endif + + /* + * need to try lookup again because + * second pkt may come here before the entry is created + * by receiving first pkt due to high line rate. + */ + info->gen_icmp_msg = CNAT_NO_ICMP_MSG; + info->error = CNAT_SUCCESS; + db = cnat_main_db_lookup_entry(ki); + + /* + * If we already have an entry with this inside address, port + * check delete the entry and proceed further. This should + * If yes, something is terribly wrong. Bail out + */ + if (PREDICT_FALSE(db)) { + + if (db->flags & CNAT_DB_FLAG_STATIC_PORT) { + + if ((db->out2in_key.k.ipv4 == ko->k.k.ipv4) && + (db->out2in_key.k.port == ko->k.k.port) && + (db->out2in_key.k.vrf == ko->k.k.vrf)) { + +#ifdef DEBUG_PRINTF_ENABLED + printf("Same Static Port Exists ki 0x%16llx ko 0x%16llx", + ki->k, ko->k); +#endif + /* + * We have already programmed this, return + */ + return (db); + } + + /* + * We already have a static port with different mapping + * Return an error for this case. + */ + info->error = CNAT_ERR_PARSER; + +#ifdef DEBUG_PRINTF_ENABLED + printf("Static Port Existing and Diff ki 0x%16llx ko 0x%16llx", + ki, db->out2in_key); +#endif + { + u32 arr[] = {STAT_PORT_CONFIG_IN_USE, (ki->k.k.vrf & CNAT_VRF_MASK), + ki->k.k.ipv4, ki->k.k.port, (ki->k.k.vrf & CNAT_PRO_MASK) }; + spp_printf(CNAT_CONFIG_ERROR, 5, arr); + } + return (db); + } + +#ifdef DEBUG_PRINTF_ENABLED + printf("Deleting Dynamic entry ki 0x%16llx ko 0x%16llx", + ki, db->out2in_key); +#endif + + /* + * If for some reason we have dynamic entries, just delete them + * and proceed. + */ + cnat_delete_main_db_entry_v2(db); + + db = NULL; + } + + protocol = ki->k.k.vrf & CNAT_PRO_MASK; + u_ki.k.k.vrf = ki->k.k.vrf & CNAT_VRF_MASK; + u_ki.k.k.ipv4 = ki->k.k.ipv4; + u_ki.k.k.port = 0; + + pm = my_vrfmap->portmap_list; + + /* + * check if src vrf, src ip addr is already + * in the user db + * if yes, use PORT_ALLOC_DIRECTED + * if no, use PORT_ALLOC_ANY since it is first time + */ + udb = cnat_user_db_lookup_entry(&u_ki); + if (PREDICT_TRUE(udb)) { + /* + * check if main db has space to accomodate new entry + */ + h = pool_header(cnat_main_db); + + free_main = vec_len(h->free_indices) - 1; + if (PREDICT_FALSE(!free_main)) { + info->error = CNAT_MAIN_DB_LIMIT; + CNAT_SET_ICMP_MSG_INFO + in2out_drops_system_limit_reached ++; + CNAT_DEBUG_INSIDE_ERR(CNAT_MAIN_DB_LIMIT) +#ifdef UT_TEST_CODE + printf("Limit reached : OLD USER"); +#endif + spp_printf(CNAT_MAIN_DB_LIMIT_ERROR, 0, 0); + return NULL; + } + + /* + * allocate port, from existing mapping + */ + my_index = udb->portmap_index; + my_pm = pm + my_index; + /* It is quite possible that we hit the scenario of CSCtj17774. + * Delete all the main db entries and add the ipv4 address sent by + * CGN-MA as Static port alloc any + */ + + if (PREDICT_FALSE(my_pm->ipv4_address != ko->k.k.ipv4)) { + if (PREDICT_FALSE(global_debug_flag && CNAT_DEBUG_GLOBAL_ALL)) { + printf("Delete Main db entry and check for" + " ipv4 address sanity pm add = 0x%x ip add = 0x%x\n", + my_pm->ipv4_address, ko->k.k.ipv4); + } + do { + /* udb is not NULL when we begin with for sure */ + head = udb->translation_list_head_index; + db = cnat_main_db + head; + cnat_delete_main_db_entry_v2(db); + } while (!pool_is_free(cnat_user_db, udb)); + + rv = cnat_mapped_static_port_alloc_v2_bulk (pm, + PORT_ALLOC_ANY, &my_index, ko->k.k.ipv4, ko->k.k.port, + udb, BULKSIZE_FROM_VRFMAP(my_vrfmap), &nfv9_log_req, + my_vrfmap->ip_n_to_1); + + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + info->error = rv; + in2out_drops_resource_depletion ++; + CNAT_SET_ICMP_MSG_INFO + CNAT_DEBUG_INSIDE_ERR(rv) + return (NULL); + } + /* + * create entry in user db + */ + udb = cnat_user_db_create_entry(&u_ki, my_index); + my_pm = pm + my_index; + if(PREDICT_TRUE(my_pm->private_ip_users_count < PORTS_PER_ADDR)) { + my_pm->private_ip_users_count++; +#ifdef DEBUG_PRINTF_IP_N_TO_1_ENABLED + PLATFORM_DEBUG_PRINT("\n cnat_create_static_main_db_entry_v2 " + "static del n alloc private_ip_users_count = " + "%d",my_pm->private_ip_users_count); +#endif + } else { + PLATFORM_DEBUG_PRINT("\n ERROR: private_ip_users_count has " + "reached MAX PORTS_PER_ADDR"); + } + NAT44_COMMON_STATS.num_subscribers++; +#ifndef NO_BULK_LOGGING + cnat_update_bulk_range_cache(udb, ko->k.k.port, + BULKSIZE_FROM_VRFMAP(my_vrfmap)); +#endif /* #ifndef NO_BULK_LOGGING */ + } else { + + rv = cnat_mapped_static_port_alloc_v2_bulk (pm, + PORT_ALLOC_DIRECTED, &my_index, ko->k.k.ipv4, ko->k.k.port, + udb, BULKSIZE_FROM_VRFMAP(my_vrfmap), &nfv9_log_req, + my_vrfmap->ip_n_to_1); + + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + info->error = rv; + CNAT_SET_ICMP_MSG_INFO + CNAT_DEBUG_INSIDE_ERR(rv) + log_port_alloc_error(rv, &(ki->k)); + return (NULL); + } + + /* + * increment port in use for this user + */ + udb->ntranslations += 1; + } + } else { + if (PREDICT_FALSE(global_debug_flag && CNAT_DEBUG_GLOBAL_ALL)) { + printf ("Static port alloc any\n"); + } + /* + * first time allocate port for this user + */ + + /* + * Check if main db has space for new entry + * Allowing a user db entry to be created if main db is not free + * will cause a port to be allocated to that user, which results in + * wastage of that port, hence the check is done here. + */ + h = pool_header(cnat_main_db); + free_main = vec_len(h->free_indices) - 1; + h = pool_header(cnat_user_db); + free_user = vec_len(h->free_indices) - 1; + + /* + * If either main_db or user_db does not have entries + * bail out, with appropriate error + */ + if (PREDICT_FALSE(!(free_main && free_user))) { + u32 log_error; + if(free_main) { + info->error = CNAT_USER_DB_LIMIT; + log_error = CNAT_USER_DB_LIMIT_ERROR; + } else { + info->error = CNAT_MAIN_DB_LIMIT; + log_error = CNAT_MAIN_DB_LIMIT_ERROR; + } + in2out_drops_system_limit_reached ++; + CNAT_SET_ICMP_MSG_INFO + CNAT_DEBUG_INSIDE_ERR(info->error) + spp_printf(log_error, 0, 0); + return NULL; + } + + rv = cnat_mapped_static_port_alloc_v2_bulk (pm, + PORT_ALLOC_ANY, &my_index, ko->k.k.ipv4, ko->k.k.port, + udb, BULKSIZE_FROM_VRFMAP(my_vrfmap), &nfv9_log_req, + my_vrfmap->ip_n_to_1); + + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + info->error = rv; + in2out_drops_resource_depletion ++; + CNAT_SET_ICMP_MSG_INFO + CNAT_DEBUG_INSIDE_ERR(rv) + log_port_alloc_error(rv, &(ki->k)); + return (NULL); + } + /* + * create entry in user db + */ + udb = cnat_user_db_create_entry(&u_ki, my_index); + my_pm = pm + my_index; + if(PREDICT_TRUE(my_pm->private_ip_users_count < PORTS_PER_ADDR)) { + my_pm->private_ip_users_count++; +#ifdef DEBUG_PRINTF_IP_N_TO_1_ENABLED + PLATFORM_DEBUG_PRINT("\n cnat_create_static_main_db_entry_v2 " + "static alloc private_ip_users_count = %d", + my_pm->private_ip_users_count); +#endif + } else { + PLATFORM_DEBUG_PRINT("\n ERROR: private_ip_users_count has " + "reached MAX PORTS_PER_ADDR"); + } + NAT44_COMMON_STATS.num_subscribers++; +#ifndef NO_BULK_LOGGING + cnat_update_bulk_range_cache(udb, ko->k.k.port, + BULKSIZE_FROM_VRFMAP(my_vrfmap)); +#endif /* #ifndef NO_BULK_LOGGING */ + } + + /* + * step 3: + * outside port is allocated for this src vrf/src ip addr + * 1)create a new entry in main db + * 2)setup cnat_out2in_hash key + * 3)setup cnat_in2out_hash key + */ + db = cnat_create_main_db_entry_and_hash(ki, ko, udb); + + translation_create_count ++; + db->vrfmap_index = my_vrfmap - cnat_map_by_vrf; + + /* + * don't forget logging + * logging API is unconditional, + * logging configuration check is done inside the inline function + */ + + if(PREDICT_FALSE(nfv9_log_req != CACHE_ALLOC_NO_LOG_REQUIRED)) { + /* if session logging is enabled .. do not log as there is no + * traffic yet + */ + if(PREDICT_FALSE(my_vrfmap->nf_logging_policy != SESSION_LOG_ENABLE)) { + cnat_nfv9_log_mapping_create(db, my_vrfmap +#ifndef NO_BULK_LOGGING + , nfv9_log_req +#endif + ); + } + if(PREDICT_FALSE(my_vrfmap->syslog_logging_policy != SESSION_LOG_ENABLE)) { + cnat_syslog_nat44_mapping_create(db, my_vrfmap, 0 +#ifndef NO_BULK_LOGGING + , nfv9_log_req +#endif + ); + } + } + + return db; +} + + +cnat_main_db_entry_t* +dslite_main_db_lookup_entry(dslite_db_key_bucket_t *ki); + +cnat_user_db_entry_t* +dslite_user_db_lookup_entry(dslite_db_key_bucket_t *uki); + +cnat_user_db_entry_t* +dslite_user_db_create_entry(dslite_db_key_bucket_t *uki, u32 portmap_index); + +cnat_main_db_entry_t* +dslite_create_main_db_entry_and_hash(dslite_db_key_bucket_t *ki, + cnat_db_key_bucket_t *ko, + cnat_user_db_entry_t *udb); + +#ifdef TOBE_PORTED +/* + * this function is called from config handler only + * to allocate a static port based db entry + * + * the actual mapped address and port are already specified + */ +cnat_main_db_entry_t* +dslite_create_static_main_db_entry_v2 (dslite_db_key_bucket_t *ki, + cnat_db_key_bucket_t *ko, + dslite_table_entry_t *dslite_entry_ptr, + cnat_gen_icmp_info *info) +{ + u16 protocol; + u32 head; + cnat_errno_t rv; + dslite_db_key_bucket_t u_ki; + u32 my_index, free_main, free_user; + cnat_portmap_v2_t *pm =0; + cnat_portmap_v2_t *my_pm =0; + cnat_user_db_entry_t *udb = 0; + cnat_main_db_entry_t *db = 0; + pool_header_t *h; + u16 dslite_id = dslite_entry_ptr->dslite_id; +#ifndef NO_BULK_LOGGING + int nfv9_log_req = BULK_ALLOC_NOT_ATTEMPTED; +#endif + cnat_vrfmap_t *my_vrfmap =0; + u16 my_vrfmap_index; + + /* + * need to try lookup again because + * second pkt may come here before the entry is created + * by receiving first pkt due to high line rate. + */ + info->gen_icmp_msg = CNAT_NO_ICMP_MSG; + info->error = CNAT_SUCCESS; + db = dslite_main_db_lookup_entry(ki); + + /* + * If we already have an entry with this inside address, port + * check delete the entry and proceed further. This should + * If yes, something is terribly wrong. Bail out + */ + if (PREDICT_FALSE(db)) { + + if (db->flags & CNAT_DB_FLAG_STATIC_PORT) { + + if ((db->out2in_key.k.ipv4 == ko->k.k.ipv4) && + (db->out2in_key.k.port == ko->k.k.port) && + (db->out2in_key.k.vrf == ko->k.k.vrf)) { + +#ifdef DEBUG_PRINTF_ENABLED + printf("Same Static Port Exists ki 0x%16llx ko 0x%16llx", + ki->k, ko->k); +#endif + /* + * We have already programmed this, return + */ + return (db); + } + + /* + * We already have a static port with different mapping + * Return an error for this case. + */ + info->error = CNAT_ERR_PARSER; + +#ifdef DEBUG_PRINTF_ENABLED + printf("Static Port Existing and Diff ki 0x%16llx ko 0x%16llx", + ki, db->out2in_key); +#endif + { + u32 arr[] = {STAT_PORT_CONFIG_IN_USE, (ki->dk.ipv4_key.k.vrf & CNAT_VRF_MASK), + ki->dk.ipv4_key.k.ipv4, ki->dk.ipv4_key.k.port, (ki->dk.ipv4_key.k.vrf & CNAT_PRO_MASK) }; + spp_printf(CNAT_CONFIG_ERROR, 5, arr); + } + return (db); + } + +#ifdef DEBUG_PRINTF_ENABLED + printf("Deleting Dynamic entry ki 0x%16llx ko 0x%16llx", + ki, db->out2in_key); +#endif + + /* + * If for some reason we have dynamic entries, just delete them + * and proceed. + */ + cnat_delete_main_db_entry_v2(db); + + db = NULL; + } + + + protocol = ki->dk.ipv4_key.k.vrf & CNAT_PRO_MASK; + u_ki.dk.ipv4_key.k.vrf = ki->dk.ipv4_key.k.vrf & CNAT_VRF_MASK; + u_ki.dk.ipv4_key.k.ipv4 = ki->dk.ipv4_key.k.ipv4; + u_ki.dk.ipv4_key.k.port = 0; + u_ki.dk.ipv6[0] = ki->dk.ipv6[0]; + u_ki.dk.ipv6[1] = ki->dk.ipv6[1]; + u_ki.dk.ipv6[2] = ki->dk.ipv6[2]; + u_ki.dk.ipv6[3] = ki->dk.ipv6[3]; + + my_vrfmap_index = vrf_map_array[u_ki.dk.ipv4_key.k.vrf]; + my_vrfmap = cnat_map_by_vrf + my_vrfmap_index; + + pm = dslite_entry_ptr->portmap_list; + + /* + * check if src vrf, src ip addr is already + * in the user db + * if yes, use PORT_ALLOC_DIRECTED + * if no, use PORT_ALLOC_ANY since it is first time + */ + udb = dslite_user_db_lookup_entry(&u_ki); + if (PREDICT_TRUE(udb)) { + /* + * check if main db has space to accomodate new entry + */ + h = pool_header(cnat_main_db); + + free_main = vec_len(h->free_indices) - 1; + if (PREDICT_FALSE(!free_main)) { + info->error = CNAT_MAIN_DB_LIMIT; + nat44_dslite_common_stats[dslite_id].in2out_drops_port_limit_exceeded ++; + DSLITE_DEBUG_INSIDE_ERR(CNAT_MAIN_DB_LIMIT) +#ifdef UT_TEST_CODE + printf("Limit reached : OLD USER"); +#endif + spp_printf(CNAT_MAIN_DB_LIMIT_ERROR, 0, 0); + return NULL; + } + + /* + * allocate port, from existing mapping + */ + my_index = udb->portmap_index; + my_pm = pm + my_index; + /* It is quite possible that we hit the scenario of CSCtj17774. + * Delete all the main db entries and add the ipv4 address sent by + * CGN-MA as Static port alloc any + */ + + if (PREDICT_FALSE(my_pm->ipv4_address != ko->k.k.ipv4)) { + if (PREDICT_FALSE(global_debug_flag && CNAT_DEBUG_GLOBAL_ALL)) { + printf("Delete Main db entry and check for" + " ipv4 address sanity pm add = 0x%x ip add = 0x%x\n", + my_pm->ipv4_address, ko->k.k.ipv4); + } + do { + /* udb is not NULL when we begin with for sure */ + head = udb->translation_list_head_index; + db = cnat_main_db + head; + cnat_delete_main_db_entry_v2(db); + } while (!pool_is_free(cnat_user_db, udb)); + + rv = cnat_mapped_static_port_alloc_v2_bulk (pm, + PORT_ALLOC_ANY, &my_index, ko->k.k.ipv4, ko->k.k.port, + udb, BULKSIZE_FROM_VRFMAP(dslite_entry_ptr), &nfv9_log_req, + my_vrfmap->ip_n_to_1); + + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + info->error = rv; + nat44_dslite_common_stats[dslite_id].in2out_drops_port_limit_exceeded ++; + DSLITE_DEBUG_INSIDE_ERR(rv) + return (NULL); + } + /* + * create entry in user db + */ + udb = dslite_user_db_create_entry(&u_ki, my_index); + nat44_dslite_common_stats[dslite_id].num_subscribers++; +#ifndef NO_BULK_LOGGING + if(PREDICT_FALSE(udb && (BULK_ALLOC_NOT_ATTEMPTED != nfv9_log_req))) { + cnat_update_bulk_range_cache(udb, ko->k.k.port, + BULKSIZE_FROM_VRFMAP(dslite_entry_ptr)); + } +#endif /* #ifndef NO_BULK_LOGGING */ + } else { + + rv = cnat_mapped_static_port_alloc_v2_bulk (pm, + PORT_ALLOC_DIRECTED, &my_index, ko->k.k.ipv4, ko->k.k.port, + udb, BULKSIZE_FROM_VRFMAP(dslite_entry_ptr), &nfv9_log_req, + my_vrfmap->ip_n_to_1); + + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + info->error = rv; + DSLITE_DEBUG_INSIDE_ERR(rv) + log_port_alloc_error(rv, &(ki->dk.ipv4_key)); + return (NULL); + } + + /* + * increment port in use for this user + */ + udb->ntranslations += 1; + } + } else { + if (PREDICT_FALSE(global_debug_flag && CNAT_DEBUG_GLOBAL_ALL)) { + printf ("Static port alloc any\n"); + } + /* + * first time allocate port for this user + */ + + /* + * Check if main db has space for new entry + * Allowing a user db entry to be created if main db is not free + * will cause a port to be allocated to that user, which results in + * wastage of that port, hence the check is done here. + */ + h = pool_header(cnat_main_db); + free_main = vec_len(h->free_indices) - 1; + h = pool_header(cnat_user_db); + free_user = vec_len(h->free_indices) - 1; + + /* + * If either main_db or user_db does not have entries + * bail out, with appropriate error + */ + if (PREDICT_FALSE(!(free_main && free_user))) { + u32 log_error; + if(free_main) { + info->error = CNAT_USER_DB_LIMIT; + log_error = CNAT_USER_DB_LIMIT_ERROR; + } else { + info->error = CNAT_MAIN_DB_LIMIT; + log_error = CNAT_MAIN_DB_LIMIT_ERROR; + } + nat44_dslite_common_stats[dslite_id].in2out_drops_port_limit_exceeded ++; + DSLITE_DEBUG_INSIDE_ERR(info->error) + spp_printf(log_error, 0, 0); + return NULL; + } + + rv = cnat_mapped_static_port_alloc_v2_bulk (pm, + PORT_ALLOC_ANY, &my_index, ko->k.k.ipv4, ko->k.k.port, + udb, BULKSIZE_FROM_VRFMAP(dslite_entry_ptr), &nfv9_log_req, + my_vrfmap->ip_n_to_1); + + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + info->error = rv; + nat44_dslite_common_stats[dslite_id].in2out_drops_port_limit_exceeded ++; + DSLITE_DEBUG_INSIDE_ERR(rv) + log_port_alloc_error(rv, &(ki->dk.ipv4_key)); + return (NULL); + } + /* + * create entry in user db + */ + udb = dslite_user_db_create_entry(&u_ki, my_index); + nat44_dslite_common_stats[dslite_id].num_subscribers++; +#ifndef NO_BULK_LOGGING + if(PREDICT_FALSE(udb && (BULK_ALLOC_NOT_ATTEMPTED != nfv9_log_req))) { + cnat_update_bulk_range_cache(udb, ko->k.k.port, + BULKSIZE_FROM_VRFMAP(dslite_entry_ptr)); + } +#endif /* #ifndef NO_BULK_LOGGING */ + } + + /* + * step 3: + * outside port is allocated for this src vrf/src ip addr + * 1)create a new entry in main db + * 2)setup cnat_out2in_hash key + * 3)setup cnat_in2out_hash key + */ + db = dslite_create_main_db_entry_and_hash(ki, ko, udb); + db->dslite_nat44_inst_id = dslite_id; + nat44_dslite_common_stats[dslite_id].active_translations++; + dslite_translation_create_count++; + + /* + * don't forget logging + * logging API is unconditional, + * logging configuration check is done inside the inline function + */ +#if 0 /* TBD - NEED TO DECIDE ON LOGGING */ + if(PREDICT_FALSE(nfv9_log_req != CACHE_ALLOC_NO_LOG_REQUIRED)) { + /* if session logging is enabled .. do not log as there is no + * traffic yet + */ +#endif /* #if 0 - this has to be removed later */ + + return db; +} +#endif /* TOBE_PORTED */ + + +/* Per port/ip timeout related routines */ +static +u32 cnat_timeout_db_hash_lookup (cnat_key_t t_key) +{ + cnat_key_t key; + u64 a, b, c; + u32 index; + cnat_timeout_db_entry_t *db; + + key.k.ipv4 = t_key.k.ipv4; + key.k.port = t_key.k.port; + key.k.vrf = t_key.k.vrf; + + CNAT_V4_GET_HASH(key.key64, + index, CNAT_TIMEOUT_HASH_MASK) + + + index = cnat_timeout_hash[index].next; + + if (PREDICT_FALSE(index == EMPTY)) + return EMPTY; + + do { + db = cnat_timeout_db + index; + if (PREDICT_TRUE((db->t_key.timeout_key.key64 & CNAT_TIMEOUT_FULL_MASK) + == (key.key64 & CNAT_TIMEOUT_FULL_MASK))) + break; + index = db->t_hash.next; + } while (index != EMPTY); + + return index; +} + +/* Pass db_type as MAIN_DB_TYPE if you are passing + * cnat_main_db_entry_t * casted as void * for db + * else pass db_type as SESSION_DB_TYPE + */ +u16 +query_and_update_db_timeout(void *db, u8 db_type) +{ + cnat_key_t t_search_key; + u32 index; + cnat_timeout_db_entry_t *timeout_db_entry; + pool_header_t *h; + u32 free; + + cnat_main_db_entry_t *mdb = NULL; + cnat_session_entry_t *sdb = NULL; + + if(PREDICT_TRUE(db_type == MAIN_DB_TYPE)) { + mdb = (cnat_main_db_entry_t *)db; + } else if(db_type == SESSION_DB_TYPE) { + sdb = (cnat_session_entry_t *)db; + } else { + return 0; + } + + h = pool_header(cnat_timeout_db); + free = vec_len(h->free_indices) - 1; + + if(free == CNAT_TIMEOUT_HASH_SIZE) { + /* No timeout db configured */ + return 0; + } + + /* First search for ip/port pair */ + if(PREDICT_TRUE(db_type == MAIN_DB_TYPE)) { + t_search_key.k.ipv4 = mdb->dst_ipv4; + t_search_key.k.port = mdb->dst_port; + t_search_key.k.vrf = mdb->in2out_key.k.vrf; + } else { + t_search_key.k.ipv4 = sdb->v4_dest_key.k.ipv4; + t_search_key.k.port = sdb->v4_dest_key.k.port; + t_search_key.k.vrf = sdb->v4_dest_key.k.vrf; + } + + index = cnat_timeout_db_hash_lookup(t_search_key); + + if(index == EMPTY) { + /* Search for port map */ + t_search_key.k.ipv4 = 0; + + index = cnat_timeout_db_hash_lookup(t_search_key); + + if(index == EMPTY) { + /* Search for ip only map */ + if(PREDICT_TRUE(db_type == MAIN_DB_TYPE)) { + t_search_key.k.ipv4 = mdb->dst_ipv4; + } else { + t_search_key.k.ipv4 = sdb->v4_dest_key.k.ipv4; + } + t_search_key.k.port = 0; + + index = cnat_timeout_db_hash_lookup(t_search_key); + if(index != EMPTY) { +#ifdef DEBUG_PRINTF_ENABLED + printf("%s: ip only map sucess\n","query_and_update_db_timeout"); +#endif + } + } else { +#ifdef DEBUG_PRINTF_ENABLED + printf("%s: port only map sucess\n", "query_and_update_db_timeout"); +#endif + } + + } else { +#ifdef DEBUG_PRINTF_ENABLED + printf("%s: ip port map sucess\n","query_and_update_db_timeout"); +#endif + + } + + if(index == EMPTY) { + /* No match found, clear timeout */ + if(PREDICT_TRUE(db_type == MAIN_DB_TYPE)) { + mdb->timeout = 0; + } else { + sdb->timeout = 0; + } +#ifdef DEBUG_PRINTF_ENABLED + printf("%s: No match\n","query_and_update_db_timeout"); +#endif + } else { + /* Match found, update timeout */ + timeout_db_entry = cnat_timeout_db + index; + if(PREDICT_TRUE(db_type == MAIN_DB_TYPE)) { + mdb->timeout = timeout_db_entry->t_key.timeout_value; + } else { + sdb->timeout = timeout_db_entry->t_key.timeout_value; + } + return timeout_db_entry->t_key.timeout_value; + } + return 0; +} + + + +static +void cnat_timeout_db_hash_add (cnat_timeout_db_entry_t *t_entry) +{ + cnat_key_t key; + u64 a, b, c; + u32 index, bucket; + cnat_key_t t_key = t_entry->t_key.timeout_key; + + key.k.ipv4 = t_key.k.ipv4; + key.k.port = t_key.k.port; + key.k.vrf = t_key.k.vrf; + + CNAT_V4_GET_HASH(key.key64, + bucket, CNAT_TIMEOUT_HASH_MASK) + + + index = cnat_timeout_hash[bucket].next; + + /* Add this db entry to the head of the bucket chain */ + t_entry->t_hash.next = index; + cnat_timeout_hash[bucket].next = t_entry - cnat_timeout_db; +} + + + +u16 +cnat_timeout_db_create (cnat_timeout_t t_entry) +{ + cnat_timeout_db_entry_t *db; + cnat_key_t t_key = t_entry.timeout_key; + u32 db_index; + + pool_header_t *h; + u32 free; + + db_index = cnat_timeout_db_hash_lookup(t_key); + + if(db_index != EMPTY) { + /* Entry already exists. Check if it is replay or update */ + db = cnat_timeout_db + db_index; + db->t_key.timeout_value = t_entry.timeout_value; + return CNAT_SUCCESS; + } + + h = pool_header(cnat_timeout_db); + free = vec_len(h->free_indices) - 1; + + if(free == 0) { + return CNAT_OUT_LIMIT; + } + + + pool_get(cnat_timeout_db, db); + ASSERT(db); + + memset(db, 0, sizeof(*db)); + + db_index = db - cnat_timeout_db; + + db->t_key.timeout_key.k.ipv4 = t_key.k.ipv4; + db->t_key.timeout_key.k.port = t_key.k.port; + db->t_key.timeout_key.k.vrf = t_key.k.vrf; + db->t_key.timeout_value = t_entry.timeout_value; + + + cnat_timeout_db_hash_add(db); + return CNAT_SUCCESS; +} + +void cnat_timeout_db_delete(cnat_key_t t_key) +{ + cnat_key_t key; + u64 a, b, c; + u32 index, bucket; + cnat_timeout_db_entry_t *this, *prev; + + key.k.ipv4 = t_key.k.ipv4; + key.k.port = t_key.k.port; + key.k.vrf = t_key.k.vrf; + + + CNAT_V4_GET_HASH(key.key64, + bucket, CNAT_TIMEOUT_HASH_MASK) + + + index = cnat_timeout_hash[bucket].next; + + if(index == EMPTY) return; + + prev = 0; + do { + this = cnat_timeout_db + index; + if (PREDICT_TRUE( + (this->t_key.timeout_key.key64 & CNAT_TIMEOUT_FULL_MASK) == + (key.key64 & CNAT_TIMEOUT_FULL_MASK))) { + if (prev == 0) { + cnat_timeout_hash[bucket].next = this->t_hash.next; + goto found; + } else { + prev->t_hash.next = this->t_hash.next; + goto found; + } + } + + prev = this; + index = this->t_hash.next; + } while (index != EMPTY); + + if(index == EMPTY) return; + + found: + pool_put(cnat_timeout_db, this); + +} + +void cnat_session_db_hash_delete (cnat_session_entry_t *ep) +{ + u32 a, b, c; + u32 index, bucket; + cnat_session_entry_t *this, *prev; + + CNAT_V4_GET_SESSION_HASH(ep->main_db_index, ep->v4_dest_key.k.ipv4, + ep->v4_dest_key.k.port, ep->v4_dest_key.k.vrf, bucket, + CNAT_SESSION_HASH_MASK) + + + index = cnat_session_hash[bucket].next; + + ASSERT(index != EMPTY); + + prev = 0; + do { + this = cnat_session_db + index; + if (PREDICT_TRUE(this == ep)) { + if (prev == 0) { + cnat_session_hash[bucket].next = + ep->cnat_session_hash.next; + return; + } else { + prev->cnat_session_hash.next = + ep->cnat_session_hash.next; + return; + } + } + prev = this; + index = this->cnat_session_hash.next; + } while (index != EMPTY); + + ASSERT(0); + +} + +cnat_session_entry_t * +cnat_session_db_edm_lookup_entry(cnat_key_t *ko,u32 session_head_index, + u32 main_db_index) +{ + u32 index; + cnat_session_entry_t *db; + + + index = session_head_index; + if (PREDICT_TRUE(index == EMPTY)) { + return (NULL); + } + + do { + db = cnat_session_db + index; + if(PREDICT_TRUE((db->main_db_index == main_db_index) && + (db->v4_dest_key.k.vrf == ko->k.vrf) && + (db->v4_dest_key.k.ipv4 == ko->k.ipv4))) { + + return db; + } + index = db->cnat_session_hash.next; + } while (index != EMPTY); + + return (NULL); +} + + + +cnat_session_entry_t * +cnat_session_db_lookup_entry(cnat_key_t *ko,u32 main_db_index) +{ + u32 a, b, c; + u32 index, bucket; + cnat_session_entry_t *db; + + CNAT_V4_GET_SESSION_HASH(main_db_index, ko->k.ipv4, ko->k.port, + ko->k.vrf, bucket, CNAT_SESSION_HASH_MASK) + + + index = cnat_session_hash[bucket].next; + if (PREDICT_TRUE(index == EMPTY)) { + return (NULL); + } + + do { + db = cnat_session_db + index; + if(PREDICT_TRUE((db->main_db_index == main_db_index) && + (db->v4_dest_key.k.vrf == ko->k.vrf) && + (db->v4_dest_key.k.port == ko->k.port) && + (db->v4_dest_key.k.ipv4 == ko->k.ipv4))) { + + return db; + } + index = db->cnat_session_hash.next; + } while (index != EMPTY); + + return (NULL); +} + +cnat_session_entry_t * +cnat_create_session_db_entry(cnat_key_t *ko, + cnat_main_db_entry_t *bdb, u8 log) +{ + u32 a, b, c; + u32 db_index, bucket_out; + cnat_session_entry_t *db = NULL; + pool_header_t *h; + u32 free_session; + u16 instance; + + db = cnat_session_db_lookup_entry(ko, bdb - cnat_main_db); + if (PREDICT_FALSE(db != NULL)) { + /*printf("Create Session - Entry already Exists\n");*/ + return db; + } + + h = pool_header(cnat_session_db); + free_session = vec_len(h->free_indices) - 1; + + if (bdb->flags & CNAT_DB_DSLITE_FLAG) { + instance = bdb->dslite_nat44_inst_id; + } else { + instance = NAT44_RESERVED_INST_ID; + } + + if (PREDICT_FALSE(!free_session)) { + nat44_dslite_common_stats[instance].drops_sessiondb_limit_exceeded++; + return NULL; + } + + if( PREDICT_FALSE(bdb->nsessions == CNAT_MAX_SESSIONS_PER_BIB)) { + /* printf("Create Session - Max sessions per BIB reached\n"); */ + return NULL; + } + + pool_get(cnat_session_db, db); + memset(db, 0, sizeof(*db)); + + db_index = db - cnat_session_db; + db->v4_dest_key.k.port = ko->k.port; + db->v4_dest_key.k.ipv4 = ko->k.ipv4; + db->v4_dest_key.k.vrf = ko->k.vrf; + + db->main_list.next = db_index; + db->main_list.prev = db_index; + db->main_db_index = bdb - cnat_main_db; + + db->tcp_seq_num = 0; + db->ack_no = 0; + db->window = 0; + + if(PREDICT_FALSE(log)) { + bdb->nsessions++; + query_and_update_db_timeout(db, SESSION_DB_TYPE); + } + + if (PREDICT_FALSE(bdb->nsessions == 1)) { + /* + * first port for this src vrf/src ip addr + */ + bdb->session_head_index = db_index; + } else { + index_dlist_addtail(bdb->session_head_index, + (u8 *)cnat_session_db, sizeof(cnat_session_db[0]), + STRUCT_OFFSET_OF(cnat_session_entry_t, main_list), + db_index); + } + + /* + * setup o2i hash key + */ + CNAT_V4_GET_SESSION_HASH(db->main_db_index, ko->k.ipv4, ko->k.port, + ko->k.vrf, bucket_out, CNAT_SESSION_HASH_MASK) + + + db->cnat_session_hash.next = + cnat_session_hash[bucket_out].next; + cnat_session_hash[bucket_out].next = db_index; + + + if(PREDICT_FALSE(log)) { + if (bdb->flags & CNAT_DB_DSLITE_FLAG) { + cnat_session_log_ds_lite_mapping_create(bdb, + (dslite_table_db_ptr + instance),db); + } else { + cnat_vrfmap_t *my_vrfmap = cnat_map_by_vrf + bdb->vrfmap_index; + cnat_session_log_nat44_mapping_create(bdb, db, my_vrfmap); + } + } + + /* Need to set entry_expires here, as we need to override 0 check for + newly established sessions */ + db->entry_expires = cnat_current_time; + nat44_dslite_common_stats[instance].sessions++; + return db; +} + +void +cnat_dest_update_main2session(cnat_main_db_entry_t *mdb, + cnat_session_entry_t *sdb) +{ + + sdb->flags = mdb->flags; + sdb->timeout = mdb->timeout; + sdb->entry_expires = mdb->entry_expires; + sdb->alg.delta = mdb->alg.delta; + sdb->tcp_seq_num = mdb->proto_data.seq_pcp.tcp_seq_num; + + /* Reset Main db values to 0 */ + /* Reset only session specific flags */ + mdb->flags &= ~(CNAT_DB_FLAG_TCP_ACTIVE | CNAT_DB_FLAG_UDP_ACTIVE + | CNAT_DB_FLAG_ALG_ENTRY | CNAT_DB_FLAG_ALG_CTRL_FLOW); + mdb->timeout = 0; + mdb->entry_expires = 0; + mdb->alg.delta = 0; + if(PREDICT_FALSE(!((mdb->flags & CNAT_DB_FLAG_PPTP_TUNNEL_ACTIVE) || + (mdb->flags & CNAT_DB_FLAG_PPTP_TUNNEL_INIT)))) { + mdb->proto_data.seq_pcp.tcp_seq_num = 0; + } + + mdb->dst_ipv4 = 0; + mdb->dst_port = 0; +} + + +void +cnat_dest_update_session2main(cnat_main_db_entry_t *mdb, + cnat_session_entry_t *sdb) +{ + + u16 flags = sdb->flags & (CNAT_DB_FLAG_TCP_ACTIVE | + CNAT_DB_FLAG_UDP_ACTIVE | CNAT_DB_FLAG_ALG_ENTRY | + CNAT_DB_FLAG_ALG_CTRL_FLOW); + mdb->flags |= flags; + mdb->timeout = sdb->timeout; + mdb->entry_expires = sdb->entry_expires; + mdb->alg.delta = sdb->alg.delta; + if(PREDICT_FALSE(!((mdb->flags & CNAT_DB_FLAG_PPTP_TUNNEL_ACTIVE) || + (mdb->flags & CNAT_DB_FLAG_PPTP_TUNNEL_INIT)))) { + mdb->proto_data.seq_pcp.tcp_seq_num = sdb->tcp_seq_num; + } + mdb->dst_ipv4 = sdb->v4_dest_key.k.ipv4; + mdb->dst_port = sdb->v4_dest_key.k.port; +} + +void cnat_delete_session_db_entry (cnat_session_entry_t *ep, u8 log) +{ + u32 session_db_index; + u32 bdb_len; + cnat_main_db_entry_t *be =0; + cnat_session_entry_t *sdb_last = NULL; + u16 instance; + + if (PREDICT_FALSE(ep->flags & CNAT_DB_NAT64_FLAG) != 0) { + /* Preventive check - Not a NAT44 entry */ + return; + } + + pool_header_t *h = pool_header(cnat_main_db); + + /* Validate .. just in case we are trying to delete a non existing one */ + bdb_len = vec_len(cnat_main_db); + + /* In case of invalid user just return, deleting only main db + * is not a good idea, since some valid user db entry might be pointing + * to that main db and hence leave the dbs in a inconsistent state + */ + if (PREDICT_FALSE((ep->main_db_index >= bdb_len) || + (clib_bitmap_get(h->free_bitmap, ep->main_db_index)))) { +#ifdef DEBUG_PRINTF_ENABLED + printf("invalid/unused user index in db %d\n", ep->main_db_index); +#endif + spp_printf(CNAT_INV_UNUSED_USR_INDEX, 1, (u32 *) &(ep->main_db_index)); + return; + } + + be = cnat_main_db + ep->main_db_index; + + session_db_index = ep - cnat_session_db; + + be->session_head_index = index_dlist_remelem ( + be->session_head_index, (u8 *)cnat_session_db, + sizeof (cnat_session_db[0]), + STRUCT_OFFSET_OF(cnat_session_entry_t, main_list), + session_db_index); + + if (be->flags & CNAT_DB_DSLITE_FLAG) { + instance = be->dslite_nat44_inst_id; + } else { + instance = NAT44_RESERVED_INST_ID; + } + + if(PREDICT_TRUE(log)) { + if (be->flags & CNAT_DB_DSLITE_FLAG) { + cnat_session_log_ds_lite_mapping_delete(be, + (dslite_table_db_ptr + instance),ep); + } else { + cnat_vrfmap_t *my_vrfmap = cnat_map_by_vrf + be->vrfmap_index; + cnat_session_log_nat44_mapping_delete(be, ep, my_vrfmap); + } + be->nsessions--; + } + + if (PREDICT_FALSE(be->nsessions == 1 && log)) { + /* There is only 1 session left + * Copy the info back to main db and release the last + * existing session + */ + + sdb_last = cnat_session_db + be->session_head_index; + ASSERT(sdb_last != NULL); + + cnat_dest_update_session2main(be, sdb_last); + cnat_delete_session_db_entry(sdb_last, FALSE); + } + + /* Remove from session DB hashes */ + cnat_session_db_hash_delete(ep); + nat44_dslite_common_stats[instance].sessions--; + + pool_put(cnat_session_db, ep); +} + +cnat_main_db_entry_t* +dslite_main_db_lookup_entry(dslite_db_key_bucket_t *ki) +{ + u64 a, b, c; + u32 index; + cnat_main_db_entry_t *db; + cnat_user_db_entry_t *userdb; + + DSLITE_V6_GET_HASH((&(ki->dk)), + ki->bucket, + CNAT_MAIN_HASH_MASK); + + DSLITE_PRINTF(1,"MDBLU hash..%u\n", ki->bucket); + + index = cnat_in2out_hash[ki->bucket].next; + if (PREDICT_TRUE(index == EMPTY)) { + DSLITE_PRINTF(1,"MDBLU index MT..\n"); + return (NULL); + } + + do { +/* We can add a flag here to indicate if the db entry is for nat44 or + * dslite. If the db entry is for nat44 then we can simply move to the + * one. + */ + db = cnat_main_db + index; + userdb = cnat_user_db + db->user_index; + if (PREDICT_TRUE(db->in2out_key.key64 == ki->dk.ipv4_key.key64) + && userdb->ipv6[0] == ki->dk.ipv6[0] + && userdb->ipv6[1] == ki->dk.ipv6[1] + && userdb->ipv6[2] == ki->dk.ipv6[2] + && userdb->ipv6[3] == ki->dk.ipv6[3]) { + DSLITE_PRINTF(1,"MDBLU success..%u\n", index); + return db; + } + index = db->in2out_hash.next; + } while (index != EMPTY); + + DSLITE_PRINTF(1,"MDBLU Entry does not exist..\n"); + return (NULL); +} + +cnat_user_db_entry_t* +dslite_user_db_lookup_entry(dslite_db_key_bucket_t *uki) +{ + u64 a, b, c; + u32 index; + cnat_user_db_entry_t *udb=NULL; + + DSLITE_V6_GET_HASH((&(uki->dk)), + uki->bucket, + CNAT_USER_HASH_MASK) + + DSLITE_PRINTF(1,"UDBLU hash..%u\n", uki->bucket); + + /* now: index in user vector */ + index = cnat_user_hash[uki->bucket].next; + if (PREDICT_TRUE(index != EMPTY)) { + DSLITE_PRINTF(1,"UDBLU hash table entry not MT..\n"); + do { + udb = cnat_user_db + index; + if (PREDICT_FALSE(udb->key.key64 == uki->dk.ipv4_key.key64) + && udb->ipv6[0] == uki->dk.ipv6[0] + && udb->ipv6[1] == uki->dk.ipv6[1] + && udb->ipv6[2] == uki->dk.ipv6[2] + && udb->ipv6[3] == uki->dk.ipv6[3]) { + DSLITE_PRINTF(1,"UDBLU success..%u\n", index); + return udb; + } + index = udb->user_hash.next; + } while (index != EMPTY); + } + DSLITE_PRINTF(1,"UDBLU Entry doesnt exist..\n"); + return (NULL); +} + +cnat_user_db_entry_t* +dslite_user_db_create_entry(dslite_db_key_bucket_t *uki, + u32 portmap_index) +{ + cnat_user_db_entry_t *udb = NULL; + + pool_get(cnat_user_db, udb); + memset(udb, 0, sizeof(*udb)); + + udb->ntranslations = 1; + udb->portmap_index = portmap_index; +// udb->key.key64 = uki->k.key64; + + udb->key.key64 = uki->dk.ipv4_key.key64; + udb->ipv6[0] = uki->dk.ipv6[0]; + udb->ipv6[1] = uki->dk.ipv6[1]; + udb->ipv6[2] = uki->dk.ipv6[2]; + udb->ipv6[3] = uki->dk.ipv6[3]; + + udb->flags |= CNAT_USER_DB_DSLITE_FLAG; + /* Add this user to the head of the bucket chain */ + udb->user_hash.next = + cnat_user_hash[uki->bucket].next; + cnat_user_hash[uki->bucket].next = udb - cnat_user_db; + +#ifndef NO_BULK_LOGGING + INIT_BULK_CACHE(udb) +#endif /* NO_BULK_LOGGING */ + + return udb; +} + +#ifndef TOBE_PORTED +cnat_main_db_entry_t* +dslite_create_main_db_entry_and_hash(dslite_db_key_bucket_t *ki, + cnat_db_key_bucket_t *ko, + cnat_user_db_entry_t *udb) +{ + return 0; +} +#else +cnat_main_db_entry_t* +dslite_create_main_db_entry_and_hash(dslite_db_key_bucket_t *ki, + cnat_db_key_bucket_t *ko, + cnat_user_db_entry_t *udb) +{ + u64 a, b, c; + u32 db_index; + cnat_main_db_entry_t *db = NULL; + + pool_get(cnat_main_db, db); + memset(db, 0, sizeof(*db)); + + db_index = db - cnat_main_db; + db->in2out_key.k.ipv4 = ki->dk.ipv4_key.k.ipv4; + db->in2out_key.k.port = ki->dk.ipv4_key.k.port; + db->in2out_key.k.vrf = ki->dk.ipv4_key.k.vrf; + db->out2in_key.k.ipv4 = ko->k.k.ipv4; + db->out2in_key.k.port = ko->k.k.port; + db->out2in_key.k.vrf = ko->k.k.vrf; + + db->user_ports.next = db_index; + db->user_ports.prev = db_index; + db->user_index = udb - cnat_user_db; + //db->portmap_index = udb->portmap_index; + db->flags |= CNAT_DB_DSLITE_FLAG; + + if (PREDICT_FALSE(udb->ntranslations == 1)) { + /* + * first port for this src vrf/src ip addr + */ + udb->translation_list_head_index = db_index; + DSLITE_PRINTF(1,"First translation of this user..\n"); + } else { + index_dlist_addtail(udb->translation_list_head_index, + (u8 *)cnat_main_db, sizeof(cnat_main_db[0]), + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports), + db_index); + } + + /* + * setup o2i hash key + */ + CNAT_V4_GET_HASH(ko->k.key64, + ko->bucket, + CNAT_MAIN_HASH_MASK) + db->out2in_hash.next = cnat_out2in_hash[ko->bucket].next; + cnat_out2in_hash[ko->bucket].next = db_index; + /* + * setup i2o hash key, bucket is already calculate + */ + db->in2out_hash.next = cnat_in2out_hash[ki->bucket].next; + cnat_in2out_hash[ki->bucket].next = db_index; + + DSLITE_PRINTF(1,"Create main db and hash..%u %u %u %u %x\n", + ki->bucket, ko->bucket, + db_index, db->user_index, ko->k.key64); + +#if DEBUG > 1 + printf("\nMy_Instance_Number %d: Bucket %d, Db_Index %d", + my_instance_number, ki->bucket, db_index); + printf("\nInside (VRF 0x%x, IP 0x%x, PORT 0x%x)", + db->in2out_key.k.vrf, db->in2out_key.k.ipv4, db->in2out_key.k.port); + printf("\nOutside (VRF 0x%x, IP 0x%x, PORT 0x%x)", + db->out2in_key.k.vrf, db->out2in_key.k.ipv4, db->out2in_key.k.port); + printf("\nUser Index %d, IP 0x%x", + db->user_index, udb->key.k.ipv4); +#endif + + //nat44_dslite_common_stats[DSLITE_COMMON_STATS].active_translations++; + + return db; +} + +static inline void handle_dslite_port_exceeded_logging( + cnat_user_db_entry_t *udb, + dslite_key_t * key, + dslite_table_entry_t *dslite_entry_ptr) +{ + + if(PREDICT_TRUE(udb->flags & CNAT_USER_DB_PORT_LIMIT_EXCEEDED)) { + /* Already logged ..*/ + return; + } + + /* else, set the flag and call the log API */ + udb->flags = udb->flags | CNAT_USER_DB_PORT_LIMIT_EXCEEDED; + cnat_log_ds_lite_port_limit_exceeded(key, dslite_entry_ptr); + return; +} +#endif + +inline void handle_cnat_port_exceeded_logging( + cnat_user_db_entry_t *udb, + cnat_key_t * key, + cnat_vrfmap_t *vrfmap) +{ + + if(PREDICT_TRUE(udb->flags & CNAT_USER_DB_PORT_LIMIT_EXCEEDED)) { + /* Already logged ..*/ + return; + } + + /* else, set the flag and call the log API */ + udb->flags = udb->flags | CNAT_USER_DB_PORT_LIMIT_EXCEEDED; + cnat_log_nat44_port_limit_exceeded(key,vrfmap); + return; +} + +#ifndef TOBE_PORTED +cnat_main_db_entry_t* +dslite_get_main_db_entry_v2(dslite_db_key_bucket_t *ki, + port_pair_t port_pair_type, + port_type_t port_type, + cnat_gen_icmp_info *info, + dslite_table_entry_t *dslite_entry_ptr, + cnat_key_t *dest_info) +{ + return 0; +} +#else +/* + * this function is called by exception node + * when lookup is fialed in i2o node + * + * if reash per user port limit, + * set user_db_entry pointer, and error == CNAT_OUT_LIMIT + */ +cnat_main_db_entry_t* +dslite_get_main_db_entry_v2(dslite_db_key_bucket_t *ki, + port_pair_t port_pair_type, + port_type_t port_type, + cnat_gen_icmp_info *info, + dslite_table_entry_t *dslite_entry_ptr, + cnat_key_t *dest_info) +{ + u16 protocol; + cnat_errno_t rv; + dslite_db_key_bucket_t u_ki; + cnat_db_key_bucket_t ko; + u32 my_index, free_main, free_user; + u32 current_timestamp; + cnat_vrfmap_t *my_vrfmap =0; + u16 my_vrfmap_index; + cnat_portmap_v2_t *pm =0; + cnat_user_db_entry_t *udb = 0; + cnat_main_db_entry_t *db = 0; + pool_header_t *h; + u16 dslite_id = dslite_entry_ptr->dslite_id; + +#ifndef NO_BULK_LOGGING + int nfv9_log_req = BULK_ALLOC_NOT_ATTEMPTED; +#endif + /* + * need to try lookup again because + * second pkt may come here before the entry is created + * by receiving first pkt due to high line rate. + */ + info->gen_icmp_msg = CNAT_NO_ICMP_MSG; + info->error = CNAT_SUCCESS; + db = dslite_main_db_lookup_entry(ki); + if (PREDICT_TRUE(db)) { + /* what if the source is talking to a + * new dest now? We will have to handle this case and + * take care of - creating session db and logging + */ + if(PREDICT_FALSE((!dest_info->k.ipv4) && (!dest_info->k.port))) { + return db; /* if dest_info is null don't create session */ + } + + if(PREDICT_TRUE((db->dst_ipv4 == dest_info->k.ipv4) && + (db->dst_port == dest_info->k.port))) { + return db; + } + dest_info->k.vrf = db->in2out_key.k.vrf; + /* Src is indeed talking to a different dest */ + cnat_session_entry_t *session_db2 = NULL; + if(PREDICT_TRUE(db->nsessions == 1)) { + session_db2 = cnat_handle_1to2_session(db, dest_info); + if(PREDICT_TRUE(session_db2 != NULL)) { + CNAT_DB_TIMEOUT_RST(session_db2); + return db; + } else { + info->error = CNAT_ERR_NO_SESSION_DB; + return NULL; + } + } else if(PREDICT_FALSE(db->nsessions == 0)) { + /* Should be static entry.. should never happen + */ + if(PREDICT_TRUE(dest_info->k.ipv4 != 0)) { + cnat_add_dest_n_log(db, dest_info); + } + return db; + } else { + /* The src has already created multiple sessions.. very rare + */ + session_db2 = cnat_create_session_db_entry(dest_info, + db, TRUE); + if(PREDICT_TRUE(session_db2 != NULL)) { + CNAT_DB_TIMEOUT_RST(session_db2); + return db; + } else { + info->error = CNAT_ERR_NO_SESSION_DB; + return NULL; + } + } + + } + + /* + * step 1. check if outside vrf is configured or not + * and Find the set of portmaps for the outside vrf + * insider vrf is one to one mappted to outside vrf + * key is vrf and ip only + * ki.k.k.vrf has protocol bits, mask out + */ + protocol = ki->dk.ipv4_key.k.vrf & CNAT_PRO_MASK; + u_ki.dk.ipv4_key.k.vrf = ki->dk.ipv4_key.k.vrf & CNAT_VRF_MASK; +#ifdef DSLITE_USER_IPV4 + u_ki.dk.ipv4_key.k.ipv4 = ki->dk.ipv4_key.k.ipv4; +#else + /* + * Inside ipv4 address should be masked, if port limit + * need to be done at B4 element level. + */ + u_ki.dk.ipv4_key.k.ipv4 = 0; +#endif + u_ki.dk.ipv4_key.k.port = 0; + + u_ki.dk.ipv6[0] = ki->dk.ipv6[0]; + u_ki.dk.ipv6[1] = ki->dk.ipv6[1]; + u_ki.dk.ipv6[2] = ki->dk.ipv6[2]; + u_ki.dk.ipv6[3] = ki->dk.ipv6[3]; + + my_vrfmap_index = vrf_map_array[u_ki.dk.ipv4_key.k.vrf]; + my_vrfmap = cnat_map_by_vrf + my_vrfmap_index; +/* Checking if the inst entry is active or not is done much earlier + */ +#if 0 + my_vrfmap_index = vrf_map_array[u_ki.k.k.vrf]; + my_vrfmap = cnat_map_by_vrf + my_vrfmap_index; + my_vrfmap_entry_found = ((my_vrfmap_index != VRF_MAP_ENTRY_EMPTY) && + (my_vrfmap->status == S_RUN) && + (my_vrfmap->i_vrf == u_ki.k.k.vrf)); + + if (PREDICT_FALSE(!my_vrfmap_entry_found)) { + u32 arr[] = {ki->k.k.vrf, ki->k.k.ipv4, ki->k.k.port}; + if ((my_vrfmap_index == VRF_MAP_ENTRY_EMPTY) || + (my_vrfmap->i_vrf == u_ki.k.k.vrf)) { + info->error = CNAT_NO_CONFIG; + CNAT_DEBUG_INSIDE_ERR(CNAT_NO_CONFIG) + spp_printf(CNAT_NO_CONFIG_ERROR, 3, arr); + } else { + info->error = CNAT_NO_VRF_RUN; + CNAT_DEBUG_INSIDE_ERR(CNAT_NO_VRF_RUN) + spp_printf(CNAT_NO_VRF_RUN_ERROR, 3, arr); + } + + return (NULL); + } +#endif +/* + dslite_inst_ptr = dslite_nat44_config_table[dslite_inst_id]; +*/ + pm = dslite_entry_ptr->portmap_list; + //pm = my_vrfmap->portmap_list; + + /* + * set o2i key with protocl bits + */ + ko.k.k.vrf = dslite_entry_ptr->o_vrf | protocol; + //ko.k.k.vrf = my_vrfmap->o_vrf | protocol; + + /* + * step 2. check if src vrf, src ip addr is alreay + * in the user db + * if yes, use PORT_ALLOC_DIRECTED + * if no, use PORT_ALLOC_ANY since it is first time + */ + udb = dslite_user_db_lookup_entry(&u_ki); + if (PREDICT_TRUE(udb)) { + /* + * not first time allocate port for this user + * check limit + */ + if (PREDICT_FALSE(udb->ntranslations >= + dslite_entry_ptr->cnat_main_db_max_ports_per_user)) { + //cnat_main_db_max_ports_per_user)) + + /* Check for the port type here. If we are getting + * a STATIC PORT, allow the config. + */ + if (PREDICT_TRUE(port_type != PORT_TYPE_STATIC)) { + info->error = CNAT_OUT_LIMIT; + DSLITE_DEBUG_INSIDE_ERR(CNAT_OUT_LIMIT) + port_exceeded_msg_log(u_ki.dk.ipv4_key.k.ipv4, u_ki.dk.ipv4_key.k.vrf); + nat44_dslite_common_stats[dslite_id].in2out_drops_port_limit_exceeded ++; + u_ki.dk.ipv4_key.k.vrf = ki->dk.ipv4_key.k.vrf; + u_ki.dk.ipv4_key.k.port = ki->dk.ipv4_key.k.port; + handle_dslite_port_exceeded_logging(udb, &u_ki.dk, dslite_entry_ptr); + return (NULL); + } + } + + CHECK_CLEAR_PORT_LIMIT_EXCEED_FLAG(udb, + dslite_entry_ptr->cnat_main_db_max_ports_per_user) + + /* + * check if main db has space to accomodate new entry + */ + h = pool_header(cnat_main_db); + + free_main = vec_len(h->free_indices) - 1; + if (PREDICT_FALSE(!free_main)) { + info->error = CNAT_MAIN_DB_LIMIT; + nat44_dslite_common_stats[dslite_id].in2out_drops_system_limit_reached ++; + DSLITE_DEBUG_INSIDE_ERR(CNAT_MAIN_DB_LIMIT) + + current_timestamp = spp_trace_log_get_unix_time_in_seconds(); + if (PREDICT_FALSE((current_timestamp - last_log_timestamp) > + 1800)) { + spp_printf(CNAT_SESSION_THRESH_EXCEEDED, 0, NULL); + last_log_timestamp = current_timestamp; + } + +#ifdef UT_TEST_CODE + printf("Limit reached : OLD USER"); +#endif + return NULL; + } + + /* + * allocate port, from existing mapping + */ + my_index = udb->portmap_index; + + if (PREDICT_FALSE(port_type == PORT_TYPE_STATIC)) { + rv = cnat_static_port_alloc_v2_bulk(pm, + PORT_ALLOC_DIRECTED, + port_pair_type, + ki->dk.ipv4_key.k.ipv4, + ki->dk.ipv4_key.k.port, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + STAT_PORT_RANGE_FROM_INST_PTR(dslite_entry_ptr) +#ifndef NO_BULK_LOGGING + , udb, + BULKSIZE_FROM_VRFMAP(dslite_entry_ptr), + &nfv9_log_req +#endif + , my_vrfmap->ip_n_to_1 + ); + } else if (PREDICT_TRUE(port_type != PORT_TYPE_RTSP) ) { + + rv = cnat_dynamic_port_alloc_v2_bulk(pm, + PORT_ALLOC_DIRECTED, + port_pair_type, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + STAT_PORT_RANGE_FROM_INST_PTR(dslite_entry_ptr) +#ifndef NO_BULK_LOGGING + , udb, + BULKSIZE_FROM_VRFMAP(dslite_entry_ptr), + &nfv9_log_req +#endif + , 0, + &(dslite_entry_ptr->rseed_ip) + ); + DSLITE_PRINTF(1,"D_PORT_ALLOC %x %u\n", ko.k.k.ipv4, ko.k.k.port); + } else { + /* + * For RTSP, two translation entries are created, + * check if main db has space to accomodate two new entry + */ + free_main = free_main - 1; + + if (PREDICT_FALSE(!free_main)) { + info->error = CNAT_MAIN_DB_LIMIT; + nat44_dslite_common_stats[dslite_id].in2out_drops_system_limit_reached ++; + DSLITE_DEBUG_INSIDE_ERR(CNAT_MAIN_DB_LIMIT) + + return NULL; + } else { + + rv = cnat_dynamic_port_alloc_rtsp_bulk(pm, + PORT_ALLOC_DIRECTED, + port_pair_type, + ki->dk.ipv4_key.k.port, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + STAT_PORT_RANGE_FROM_INST_PTR(dslite_entry_ptr) +#ifndef NO_BULK_LOGGING + , udb, + BULKSIZE_FROM_VRFMAP(dslite_entry_ptr), + &nfv9_log_req +#endif + , &(dslite_entry_ptr->rseed_ip) + ); + } + } + + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + DSLITE_PRINTF(1,"D_PORT_ALLOC port alloc error\n"); + info->error = rv; + DSLITE_DEBUG_INSIDE_ERR(rv) + nat44_dslite_common_stats[dslite_id].in2out_drops_resource_depletion ++; + log_port_alloc_error(rv, &(ki->dk.ipv4_key)); + return (NULL); + } + /* + * increment port in use for this user + */ + udb->ntranslations += 1; + } else { + /* + * first time allocate port for this user + */ + + /* + * Do not create entry if port limit is invalid + */ + if (PREDICT_FALSE(!(dslite_entry_ptr->cnat_main_db_max_ports_per_user))) { + if (PREDICT_TRUE(port_type != PORT_TYPE_STATIC)) { + info->error = CNAT_OUT_LIMIT; + nat44_dslite_common_stats[dslite_id].in2out_drops_port_limit_exceeded ++; + port_exceeded_msg_log(u_ki.dk.ipv4_key.k.ipv4, u_ki.dk.ipv4_key.k.vrf); + DSLITE_DEBUG_INSIDE_ERR(CNAT_OUT_LIMIT) + return (NULL); + } + } + + /* + * Check if main db has space for new entry + * Allowing a user db entry to be created if main db is not free + * will cause a port to be allocated to that user, which results in + * wastage of that port, hence the check is done here. + */ + h = pool_header(cnat_main_db); + free_main = vec_len(h->free_indices) - 1; + + h = pool_header(cnat_user_db); + free_user = vec_len(h->free_indices) - 1; + + /* + * If either main_db or user_db does not have entries + * bail out, with appropriate error + */ + if (PREDICT_FALSE(!(free_main && free_user))) { + u32 log_error; + if(free_main) { + info->error = CNAT_USER_DB_LIMIT; + log_error = CNAT_USER_DB_LIMIT_ERROR; + } else { + info->error = CNAT_MAIN_DB_LIMIT; + log_error = CNAT_MAIN_DB_LIMIT_ERROR; + } + nat44_dslite_common_stats[dslite_id].in2out_drops_system_limit_reached ++; + DSLITE_DEBUG_INSIDE_ERR(info->error) + spp_printf(log_error, 0, 0); + return NULL; + } + + if (PREDICT_FALSE(port_type == PORT_TYPE_STATIC)) { + rv = cnat_static_port_alloc_v2_bulk(pm, + PORT_ALLOC_ANY, + port_pair_type, + ki->dk.ipv4_key.k.ipv4, + ki->dk.ipv4_key.k.port, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + STAT_PORT_RANGE_FROM_INST_PTR(dslite_entry_ptr) +#ifndef NO_BULK_LOGGING + , NULL, + BULKSIZE_FROM_VRFMAP(dslite_entry_ptr), + &nfv9_log_req +#endif + , my_vrfmap->ip_n_to_1 + + ); + } else if (PREDICT_TRUE(port_type != PORT_TYPE_RTSP)) { + rv = cnat_dynamic_port_alloc_v2_bulk(pm, + PORT_ALLOC_ANY, + port_pair_type, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + STAT_PORT_RANGE_FROM_INST_PTR(dslite_entry_ptr) +#ifndef NO_BULK_LOGGING + , NULL, + BULKSIZE_FROM_VRFMAP(dslite_entry_ptr), + &nfv9_log_req +#endif + , 0, + &(dslite_entry_ptr->rseed_ip) + ); + DSLITE_PRINTF(1,"NU:D PORT ALLOC..%x %u\n", ko.k.k.ipv4, + ko.k.k.port); + + } else { + /* + * For RTSP, two translation entries are created, + * check if main db has space to accomodate two new entry + */ + free_main = free_main - 1; + + if (PREDICT_FALSE(!free_main)) { + info->error = CNAT_MAIN_DB_LIMIT; + nat44_dslite_common_stats[dslite_id].in2out_drops_system_limit_reached ++; + DSLITE_DEBUG_INSIDE_ERR(CNAT_MAIN_DB_LIMIT) + + return NULL; + } else { + + rv = cnat_dynamic_port_alloc_rtsp_bulk(pm, + PORT_ALLOC_DIRECTED, + port_pair_type, + ki->dk.ipv4_key.k.port, + &my_index, + &(ko.k.k.ipv4), + &(ko.k.k.port), + STAT_PORT_RANGE_FROM_INST_PTR(dslite_entry_ptr) +#ifndef NO_BULK_LOGGING + , NULL, + BULKSIZE_FROM_VRFMAP(dslite_entry_ptr), + &nfv9_log_req +#endif + , &(dslite_entry_ptr->rseed_ip) + ); + /* TODO: Add the port pair flag here */ + } + } + + + + if (PREDICT_FALSE(rv != CNAT_SUCCESS)) { + DSLITE_PRINTF(1,"NU:D_PORT_ALLOC port alloc error\n"); + info->error = rv; + nat44_dslite_common_stats[dslite_id].in2out_drops_resource_depletion ++; + DSLITE_DEBUG_INSIDE_ERR(rv) + log_port_alloc_error(rv, &(ki->dk.ipv4_key)); + return (NULL); + } + /* + * create entry in user db + */ + udb = dslite_user_db_create_entry(&u_ki, my_index); + nat44_dslite_common_stats[dslite_id].num_subscribers++; + DSLITE_PRINTF(1,"UDB crete entry done..\n"); +#ifndef NO_BULK_LOGGING + if(PREDICT_TRUE(udb && (BULK_ALLOC_NOT_ATTEMPTED != nfv9_log_req))) { + cnat_update_bulk_range_cache(udb, ko.k.k.port, + BULKSIZE_FROM_VRFMAP(dslite_entry_ptr)); + } +#endif /* #ifndef NO_BULK_LOGGING */ + } + + /* + * step 3: + * outside port is allocated for this src vrf/src ip addr + * 1)create a new entry in main db + * 2)setup cnat_out2in_hash key + * 3)setup cnat_in2out_hash key + */ + db = dslite_create_main_db_entry_and_hash(ki, &ko, udb); + DSLITE_PRINTF(1,"dslite_create_main_db_entry_and_hash done..\n"); + //db->vrfmap_index = my_vrfmap - cnat_map_by_vrf; + db->dslite_nat44_inst_id = dslite_id; + nat44_dslite_common_stats[dslite_id].active_translations++; + if (PREDICT_FALSE(port_type == PORT_TYPE_STATIC)) { + nat44_dslite_common_stats[dslite_id].num_static_translations++; + } else { + nat44_dslite_common_stats[dslite_id].num_dynamic_translations++; + } + + dslite_translation_create_count++; + + db->dst_ipv4 = dest_info->k.ipv4; + db->dst_port = dest_info->k.port; + if(PREDICT_TRUE(db->dst_ipv4 || db->dst_port)) { + /* for static fwding, let the nsessions remain zero */ + db->nsessions++; + } + + /* + * don't forget logging + * logging API is unconditional, + * logging configuration check is done inside the inline function + */ + if(PREDICT_FALSE(nfv9_log_req != CACHE_ALLOC_NO_LOG_REQUIRED)) { + if(PREDICT_FALSE( dslite_entry_ptr->nf_logging_policy == + SESSION_LOG_ENABLE)) { + if(PREDICT_TRUE(db->dst_ipv4 || db->dst_port)) { + cnat_nfv9_ds_lite_log_session_create(db, + dslite_entry_ptr,NULL); + } + } else { + cnat_nfv9_ds_lite_mapping_create(db,dslite_entry_ptr +#ifndef NO_BULK_LOGGING + ,nfv9_log_req +#endif + ); + } + if(PREDICT_TRUE((dslite_entry_ptr->syslog_logging_policy != SESSION_LOG_ENABLE) || + (db->dst_ipv4 || db->dst_port))) { + cnat_syslog_ds_lite_mapping_create(db,dslite_entry_ptr,NULL +#ifndef NO_BULK_LOGGING + ,nfv9_log_req +#endif + ); + } + } + +#if 0 + if (PREDICT_FALSE(port_pair_type == PORT_PAIR)) { + cnat_main_db_entry_t *db2 = 0; + dslite_db_key_bucket_t new_ki = *ki; + u64 a, b, c; + + new_ki.k.k.port += 1; + ko.k.k.port += 1; + + CNAT_V4_GET_HASH(new_ki.k.key64, new_ki.bucket, + CNAT_MAIN_HASH_MASK); + + db2 = cnat_create_main_db_entry_and_hash(&new_ki, &ko, udb); + + translation_create_count ++; + db2->dslite_nat44_inst_id = dslite_id; + db2->entry_expires = cnat_current_time; + db2->flags |= CNAT_DB_FLAG_ALG_ENTRY; + udb->ntranslations += 1; +#ifndef NO_BULK_LOGGING + if(PREDICT_FALSE(nfv9_log_req == BULK_ALLOC_NOT_ATTEMPTED)) + cnat_nfv9_log_mapping_create(db2, my_vrfmap, nfv9_log_req); +#else + cnat_nfv9_log_mapping_create(db2, my_vrfmap); +#endif + } +#endif + return db; +} +#endif /* TOBE_PORTED */ + +#if 0 +/* TOBE_PORTED */ +uword +cnat_db_v2_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return 0; +} +VLIB_REGISTER_NODE (cnat_db_v2_node) = { + .function = cnat_db_v2_node_fn, + .name = "vcgn-db-v2", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_db_v2_error_strings), + .error_strings = cnat_db_v2_error_strings, + + .n_next_nodes = CNAT_DB_V2_DROP, + + /* edit / add dispositions here */ + .next_nodes = { + [CNAT_DB_V2_DROP] = "error-drop", + }, +}; +#endif +void cnat_db_v2_init (void) +{ + + u32 i, n; + cnat_timeout_db_entry_t * tdb __attribute__((unused)); + + cgse_nat_db_entry_t *comb_db __attribute__((unused)); + cgse_nat_user_db_entry_t *comb_user __attribute__((unused)); + cgse_nat_session_db_entry_t *comb_session __attribute__((unused)); + + n = CNAT_DB_SIZE*1.15; /* add 15% LB margin */ + + /* + * We also make it multiple of NUM_BITS_IN_UWORD for better + * DB scanning algorithm + */ + if (n % NUM_BITS_IN_UWORD) + n += (NUM_BITS_IN_UWORD - (n % NUM_BITS_IN_UWORD)); + + pool_alloc(cgse_nat_db,n); + for(i=0; i< n; i++) { + pool_get(cgse_nat_db, comb_db); + } + + for(i=0; i< n; i++) { + pool_put(cgse_nat_db, cgse_nat_db + i); + } + + cnat_main_db = &cgse_nat_db->nat44_main_db; + + /* For Sessions */ + if(PLATFORM_DBL_SUPPORT) { + /* create session table for NAT44 and NAT64 itself */ + printf("DBL Support exist %d\n", PLATFORM_DBL_SUPPORT); + n = CNAT_SESSION_DB_SIZE * 1.15; /* add 15% LB margin */ + } else { + /* Create session table for NAT64 only */ + printf("DBL Support Not exist\n"); + n = NAT64_MAIN_DB_SIZE * 1.15; /* add 15% LB margin */ + } + + /* + * We also make it multiple of NUM_BITS_IN_UWORD for better + * DB scanning algorithm + */ + if (n % NUM_BITS_IN_UWORD) + n += (NUM_BITS_IN_UWORD - (n % NUM_BITS_IN_UWORD)); + + pool_alloc(cgse_session_db,n); + for(i=0; i< n; i++) { + pool_get(cgse_session_db, comb_session); + } + + for(i=0; i< n; i++) { + pool_put(cgse_session_db, cgse_session_db + i); + } + + cnat_session_db = &cgse_session_db->nat44_session_db; + + vec_validate(cnat_out2in_hash, CNAT_MAIN_HASH_MASK); + memset(cnat_out2in_hash, 0xff, CNAT_MAIN_HASH_SIZE*sizeof(index_slist_t)); + + vec_validate(cnat_in2out_hash, CNAT_MAIN_HASH_MASK); + memset(cnat_in2out_hash, 0xff, CNAT_MAIN_HASH_SIZE*sizeof(index_slist_t)); + + vec_validate(cnat_session_hash, CNAT_SESSION_HASH_MASK); + memset(cnat_session_hash, 0xff, CNAT_SESSION_HASH_SIZE*sizeof(index_slist_t)); + + n = CNAT_USER_DB_SIZE * 1.15; /* use hash size as db size for LB margin */ + if (n % NUM_BITS_IN_UWORD) + n += (NUM_BITS_IN_UWORD - (n % NUM_BITS_IN_UWORD)); + + pool_alloc(cgse_user_db,n); + for(i=0; i< n; i++) { + pool_get(cgse_user_db, comb_user); + } + + for(i=0; i< n; i++) { + pool_put(cgse_user_db, cgse_user_db + i); + } + + cnat_user_db = &cgse_user_db->nat44_user_db; + + vec_validate(cnat_user_hash, CNAT_USER_HASH_MASK); + memset(cnat_user_hash, 0xff, CNAT_USER_HASH_SIZE*sizeof(index_slist_t)); + + n = CNAT_TIMEOUT_HASH_SIZE; /* use hash size as db size for LB margin */ + for(i=0; i< n; i++) { + pool_get(cnat_timeout_db, tdb); + } + + for(i=0; i< n; i++) { + pool_put(cnat_timeout_db, cnat_timeout_db + i); + } + + vec_validate(cnat_timeout_hash, CNAT_TIMEOUT_HASH_MASK); + memset(cnat_timeout_hash, 0xff, CNAT_TIMEOUT_HASH_SIZE*sizeof(index_slist_t)); + +#ifdef TOBE_PORTED + for (i=0;i<CNAT_MAX_VRFMAP_ENTRIES; i++) { + svi_params_array[i].svi_type = CGSE_SVI_TYPE_INFRA; + } +#endif + cnat_db_init_done = 1; + printf("CNAT DB init is successful\n"); + return; + //return 0; +} diff --git a/vnet/vnet/vcgn/cnat_debug_msg_handler.c b/vnet/vnet/vcgn/cnat_debug_msg_handler.c new file mode 100644 index 00000000000..9c84ac1e856 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_debug_msg_handler.c @@ -0,0 +1,1780 @@ +/* + *------------------------------------------------------------------ + * cnat_debug_msg_handler.c - debug command + * + * Copyright (c) 2007-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +#include "cnat_cli.h" + +u32 global_debug_flag = CNAT_DEBUG_NONE; +u16 debug_i_vrf = CNAT_DEBUG_NONE; +u32 debug_i_flag = CNAT_DEBUG_NONE; +u32 debug_i_addr_start = CNAT_DEBUG_NONE; +u32 debug_i_addr_end = CNAT_DEBUG_NONE; + +u16 debug_o_vrf = CNAT_DEBUG_NONE; +u32 debug_o_flag = CNAT_DEBUG_NONE; +u32 debug_o_addr_start = CNAT_DEBUG_NONE; +u32 debug_o_addr_end = CNAT_DEBUG_NONE; + +u32 udp_inside_checksum_disable = 0; +u32 udp_outside_checksum_disable = 0; +u32 udp_inside_packet_dump_enable = 0; +u32 udp_outside_packet_dump_enable = 0; + +u32 tcp_logging_enable_flag = 0; + +u32 icmp_debug_flag = 0; +u32 frag_debug_flag = 0; + +u32 nfv9_logging_debug_flag = 0; +u32 syslog_debug_flag = 0; + +u32 summary_stats_debug_flag = 0; + +/* + * By defaut we set the config debug level to 1 + */ +u32 config_debug_level = 1; + +#ifdef TOBE_PORTED +extern void show_bulk_port_stats(); +extern void clear_bulk_port_stats(); +extern void show_bulk_port_allocation(u16 in_vrfid, u32 inside_ip); +extern void set_bulk_size_to_all_vrfs(int bulk_size); + +u32 *cnat_debug_addr_list; + +extern int global_pd_dbg_lvl; +extern int global_pi_dbg_lvl; +extern int global_l2_dbg_lvl; +extern u32 cnat_pptp_debug_flag; +extern u32 cnat_pcp_debug_flag; + +void spp_api_cnat_get_cgn_db_summary +(spp_api_cnat_generic_command_request_t *); + +void spp_api_cnat_v4_debug_dummy_t_handler +(spp_api_cnat_v4_debug_dummy_t *mp) +{ + u32 arr[] = { DEBUG_DUMMY }; + spp_printf(CNAT_DUMMY_HANDLER_HIT, 1, arr); + if(global_pd_dbg_lvl) { + PLATFORM_DEBUG_PRINT("\n invalid debug command received: message id is 0\n"); + } + mp->rc = CNAT_ERR_INVALID_MSG_ID; + +} + +void spp_api_cnat_v4_debug_dummy_max_t_handler +(spp_api_cnat_v4_debug_dummy_max_t *mp) +{ + u32 arr[] = { DEBUG_DUMMY_MAX }; + spp_printf(CNAT_DUMMY_HANDLER_HIT, 1, arr); + if(global_pd_dbg_lvl) { + PLATFORM_DEBUG_PRINT("\n invalid debug command received: message id is out of range\n"); + } + mp->rc = CNAT_ERR_INVALID_MSG_ID; + +} + + +void spp_api_cnat_v4_debug_global_t_handler +(spp_api_cnat_v4_debug_global_t *mp) +{ + if ((mp->debug_flag == CNAT_DEBUG_GLOBAL_ERR) || + (mp->debug_flag == CNAT_DEBUG_GLOBAL_ALL) || + (mp->debug_flag == CNAT_DEBUG_NONE)) { + mp->rc = CNAT_SUCCESS; + global_debug_flag = mp->debug_flag; + return; + } + + mp->rc = CNAT_ERR_PARSER; + if(global_pd_dbg_lvl) { + PLATFORM_DEBUG_PRINT("invalid global debug flag %x\n", + mp->debug_flag); + } + return; +} + +void spp_node_print_cnat_counters() +{ + if (cnat_global_counters.nfv9_downstream_constipation_count) { + PLATFORM_DEBUG_PRINT("\nNF downstream constipation count: %llu\n", + cnat_global_counters.nfv9_downstream_constipation_count); + } + + if (xlat_global_counters.v4_to_v6_frag_invalid_uidb_drop_count || + xlat_global_counters.v6_to_v4_frag_invalid_uidb_drop_count || + xlat_global_counters.v4_to_v6_icmp_invalid_uidb_drop_count || + xlat_global_counters.v6_to_v4_icmp_invalid_uidb_drop_count || + xlat_global_counters.v4_to_v6_tcp_invalid_uidb_drop_count || + xlat_global_counters.v6_to_v4_tcp_invalid_uidb_drop_count || + xlat_global_counters.v4_to_v6_udp_invalid_uidb_drop_count || + xlat_global_counters.v6_to_v4_udp_invalid_uidb_drop_count || + xlat_global_counters.v4_to_v6_udp_crc_zero_invalid_uidb_drop_count) { + + PLATFORM_DEBUG_PRINT("\nMy_instance %d: v4_to_v6 frag invalid uidb drop count %lld", + my_instance_number, + xlat_global_counters.v4_to_v6_frag_invalid_uidb_drop_count); + + PLATFORM_DEBUG_PRINT("\nMy_instance %d: v6_to_v4 frag invalid uidb drop count %lld", + my_instance_number, + xlat_global_counters.v6_to_v4_frag_invalid_uidb_drop_count); + + PLATFORM_DEBUG_PRINT("\nMy_instance %d: v4_to_v6 icmp invalid uidb drop count %lld", + my_instance_number, + xlat_global_counters.v4_to_v6_icmp_invalid_uidb_drop_count); + + PLATFORM_DEBUG_PRINT("\nMy_instance %d: v6_to_v4 icmp invalid uidb drop count %lld", + my_instance_number, + xlat_global_counters.v6_to_v4_icmp_invalid_uidb_drop_count); + + PLATFORM_DEBUG_PRINT("\nMy_instance %d: v4_to_v6 tcp invalid uidb drop count %lld", + my_instance_number, + xlat_global_counters.v4_to_v6_tcp_invalid_uidb_drop_count); + + PLATFORM_DEBUG_PRINT("\nMy_instance %d: v6_to_v4 tcp invalid uidb drop count %lld", + my_instance_number, + xlat_global_counters.v6_to_v4_tcp_invalid_uidb_drop_count); + + PLATFORM_DEBUG_PRINT("\nMy_instance %d: v4_to_v6 udp invalid uidb drop count %lld", + my_instance_number, + xlat_global_counters.v4_to_v6_udp_invalid_uidb_drop_count); + + PLATFORM_DEBUG_PRINT("\nMy_instance %d: v6_to_v4 udp invalid uidb drop count %lld", + my_instance_number, + xlat_global_counters.v6_to_v4_udp_invalid_uidb_drop_count); + + PLATFORM_DEBUG_PRINT("\nMy_instance %d: v4_to_v6 udp crc0 invld uidb drop count %lld", + my_instance_number, + xlat_global_counters.v4_to_v6_udp_crc_zero_invalid_uidb_drop_count); + + PLATFORM_DEBUG_PRINT("\n"); + } + + +} + +void spp_log_p2mp_req(spp_api_cnat_p2mp_debug_request_t *mp) +{ + u8 i = 0; + u32 num_rec = spp_net_to_host_byte_order_32(&mp->param[i++]); + u32 err_c_num_args; + + while (num_rec--) { + u8 j = 0; + u16 err_c; + u16 num_args; + u32 argv[32]; + + err_c_num_args = spp_net_to_host_byte_order_32(&mp->param[i++]); + err_c = (err_c_num_args >> 16) & 0xFFFF; + num_args = err_c_num_args & 0xFFFF; + + num_args = (num_args <= 32) ? num_args : 32; + while (j < num_args) { + argv[j++] = spp_net_to_host_byte_order_32(&mp->param[i++]); + } + + i += ((num_args - 32) > 0) ? (num_args - 32) : 0; + spp_printf(err_c, num_args, argv); + } +} + +void nat64_debug_addr_pool_add_del() +{ + cnat_portmap_v2_t *my_pm = NULL; + cnat_portmap_v2_t *pm = NULL; + u32 len, i, pm_len; + + PLATFORM_DEBUG_PRINT("\n sizeof port_map =%d\n", sizeof( cnat_portmap_v2_t)); + len = 10; + PLATFORM_DEBUG_PRINT("\n adding 10 entries in vector 1-10\n "); + vec_add2(pm, my_pm, len); + pm = my_pm; + + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + for(i=0;i<len;i++){ + my_pm->ipv4_address = i+1; + my_pm++; + } + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + + pm_len = vec_len(pm); + PLATFORM_DEBUG_PRINT("\n printing vector contents : vec_len = %d \n", pm_len); + my_pm = pm; + for(i=0;i<pm_len ; i++) + { + PLATFORM_DEBUG_PRINT(" %d ,",my_pm->ipv4_address); + my_pm++; + } + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + + PLATFORM_DEBUG_PRINT("\n adding 5 entries in vector 11-15\n "); + len = 5; + vec_add2(pm, my_pm, len); + + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + for(i=0;i<len;i++) { + my_pm->ipv4_address = 11+i; + my_pm++; + } + + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + pm_len = vec_len(pm); + PLATFORM_DEBUG_PRINT("\n printing vector contents : vec_len = %d \n", pm_len); + my_pm = pm; + for(i=0;i<pm_len ; i++) + { + PLATFORM_DEBUG_PRINT(" %d ,",my_pm->ipv4_address); + my_pm++; + } + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + + PLATFORM_DEBUG_PRINT("\n adding 6 entries in vector 16-21\n "); + len = 6; + vec_add2(pm, my_pm, len); + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + for(i=0;i<len;i++) { + my_pm->ipv4_address = 16+i; + my_pm++; + } + + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + pm_len = vec_len(pm); + PLATFORM_DEBUG_PRINT("\n printing vector contents : vec_len = %d \n", pm_len); + my_pm = pm; + for(i=0;i<pm_len ; i++) + { + PLATFORM_DEBUG_PRINT(" %d ,",my_pm->ipv4_address); + my_pm++; + } + + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + PLATFORM_DEBUG_PRINT("\nDeleting 7 entries starting from entry value=8\n"); + pm_len = vec_len(pm); + my_pm = pm; + PLATFORM_DEBUG_PRINT(" pm_len =%d\n", pm_len); + for(i=0;i<pm_len;i++) + { + if(my_pm->ipv4_address == 8){ + PLATFORM_DEBUG_PRINT("\n match found brraeaking..\n"); + break; + } + my_pm++; + } + + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p i= %d\n", pm, my_pm, i); +// vec_delete(pm, 7, my_pm); + vec_delete(pm, 7, i); + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + + PLATFORM_DEBUG_PRINT(" printing entries aftr deletion from 8-14\n"); + pm_len = vec_len(pm); + PLATFORM_DEBUG_PRINT("\n printing vector contents : vec_len = %d \n", pm_len); + my_pm = pm; + for(i=0;i<pm_len ; i++) + { + PLATFORM_DEBUG_PRINT(" %d ,",my_pm->ipv4_address); + my_pm++; + } + + + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + + PLATFORM_DEBUG_PRINT("\nadding deleted items again 8-14\n"); + len =7; + vec_add2(pm, my_pm, len); + + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + for(i=0;i<len;i++) { + my_pm->ipv4_address = 8+i; + my_pm++; + } + + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + pm_len = vec_len(pm); + PLATFORM_DEBUG_PRINT("\n printing vector contents : vec_len = %d \n", pm_len); + my_pm = pm; + for(i=0;i<pm_len ; i++) + { + PLATFORM_DEBUG_PRINT(" %d ,",my_pm->ipv4_address); + my_pm++; + } + PLATFORM_DEBUG_PRINT(" pm =%p , my_pm = %p\n", pm, my_pm); + PLATFORM_DEBUG_PRINT("\n"); +} + + +void uidb_mapping_dump_timeout() { + + u32 i; + + PLATFORM_DEBUG_PRINT("\nCGSE uidb mapping table \n"); + for(i = 0;i < 30;i++) { + PLATFORM_DEBUG_PRINT("%d ",*(cgse_uidb_index_cgse_id_mapping_ptr + i)); + } + +} + +void nat64_debug_dump_info(u32 debug_value) +{ + + switch(debug_value) { + + case 1 : + bib_add_v6_entry1(); + break; + + case 2 : + bib_add_v6_entry2(); + break; + + case 3 : + bib_add_v6_entry1_new(); + break; + + case 4 : + bib_add_v6_entry1_new_static(); + break; + + case 5 : + bib_add_v6_entry3(); + break; + + case 6 : + bib_add_v6_entry_new2(); + break; + + case 7 : + nat64_fill_table_entry(); + break; + + case 10 : + nat64_db_dump_main(); + break; + + case 11 : + nat64_db_dump_user(); + break; + + case 12 : + nat64_db_dump_session(); + break; + + case 13 : + nat64_dump_table(); + break; + + case 14 : + bib_del_v6_entry1_static(); + break; + + case 15 : + nat64_debug_addr_pool_add_del(); + break; + + case 16 : + nat64_db_dump_timeout(0); + break; + + case 17 : + uidb_mapping_dump_timeout(); + break; + + default : break; + } +} + + +void cnat_debug_flags_set (spp_api_cnat_p2mp_debug_request_t *mp) +{ + u32 debug_variable = spp_net_to_host_byte_order_32(&mp->param[0]); + u32 debug_value = spp_net_to_host_byte_order_32(&mp->param[1]); + + cnat_key_t t_key; + + switch (debug_variable) { + + case CNAT_DEBUG_FLAG_UDP_INSIDE_CHECKSUM_DISABLE: + udp_inside_checksum_disable = debug_value; + PLATFORM_DEBUG_PRINT("\nudp_inside_checksum_disable set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_UDP_OUTSIDE_CHECKSUM_DISABLE: + udp_outside_checksum_disable = debug_value; + PLATFORM_DEBUG_PRINT("\nudp_outside_checksum_disable set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_UDP_OUTSIDE_PKT_DUMP_ENABLE: + udp_outside_packet_dump_enable = debug_value; + PLATFORM_DEBUG_PRINT("\nudp_outside_packet_dump_enable set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_UDP_INSIDE_PKT_DUMP_ENABLE: + udp_inside_packet_dump_enable = debug_value; + PLATFORM_DEBUG_PRINT("\nudp_inside_packet_dump_enable set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_ICMP_PKT_DUMP_ENABLE: + icmp_debug_flag = debug_value; + PLATFORM_DEBUG_PRINT("\nicmp_debug_flag set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_FRAG_PKT_DUMP_ENABLE: + frag_debug_flag = debug_value; + PLATFORM_DEBUG_PRINT("\nfrag_debug_flag set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_XLAT_CONFIG_DEBUG_ENABLE: + xlat_config_debug_level = debug_value; + PLATFORM_DEBUG_PRINT("\nxlat_config_debug_level set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_NAT64_CONFIG_DEBUG_ENABLE: + nat64_config_debug_level = debug_value; + PLATFORM_DEBUG_PRINT("\nnat64_config_debug_level set to %d\n", debug_value); + nat64_debug_dump_info(debug_value); + break; + + case CNAT_DEBUG_FLAG_NAT64_DATA_PATH_DEBUG_ENABLE: + nat64_data_path_debug_level = debug_value; + PLATFORM_DEBUG_PRINT("\nnat64_data_path_debug_level set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_DSLITE_CONFIG_DEBUG_ENABLE: + ds_lite_config_debug_level = debug_value; + PLATFORM_DEBUG_PRINT("\nds_lite_config_debug_level set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_XLAT_DATA_PATH_DEBUG_ENABLE: + xlat_data_path_debug_level = debug_value; + PLATFORM_DEBUG_PRINT("\nxlat_data_path_debug_level set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_CONFIG_DEBUG_ENABLE: + config_debug_level = debug_value; + + PLATFORM_DEBUG_PRINT("\nconfig_debug_level set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_CONFIG_PPTP_ENABLE: + cnat_pptp_debug_flag = debug_value; + + if(debug_value == 0) { + pptp_dump_counters(); + } + + PLATFORM_DEBUG_PRINT("\ncnat_pptp_debug_level set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_CONFIG_PCP_ENABLE: + cnat_pcp_debug_flag = debug_value; + + if(debug_value == 0) { + pcp_dump_counters(); + } + PLATFORM_DEBUG_PRINT("\ncnat_pcp_debug_level set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_GLOBAL_DEBUG_ALL_ENABLE: + global_debug_flag = debug_value; + PLATFORM_DEBUG_PRINT("\nglobal_debug_flag set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_SUMMARY_STATS_DEBUG_ENABLE: + summary_stats_debug_flag = debug_value; + PLATFORM_DEBUG_PRINT("\nsummary_stats_debug_flag set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_SHOW_DEBUG_ENABLE: + show_debug_level = debug_value; + PLATFORM_DEBUG_PRINT("\nshow_debug_level set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_TCP_LOGGING_ENABLE: + tcp_debug_logging_enable_disable(debug_value); + break; + case CNAT_DEBUG_FLAG_V6RD_DATA_PATH_DEBUG_ENABLE: + v6rd_data_path_debug_level = debug_value; + PLATFORM_DEBUG_PRINT("\nv6rd_data_path_debug_level set to %d\n", debug_value); + break; + case CNAT_DEBUG_FLAG_V6RD_CONFIG_DEBUG_ENABLE: + v6rd_config_debug_level = debug_value; + PLATFORM_DEBUG_PRINT("\nv6rd_config_debug_level set to %d\n", debug_value); + break; + case CNAT_DEBUG_FLAG_V6RD_DEFRAG_DEBUG_ENABLE: + /* set debug atleast to 1, so that critical errors are always + * enabled + */ + v6rd_defrag_debug_level = debug_value ? debug_value : 1; + PLATFORM_DEBUG_PRINT("\nv6rd_config_debug_level set to %d\n", debug_value); + break; + + + case CNAT_DEBUG_SET_STATIC_PORT_RANGE: + PLATFORM_DEBUG_PRINT("\nChange Static Port Range from %d --> %d\n", + cnat_static_port_range, debug_value); + cnat_static_port_range = debug_value; + break; + + case CNAT_DEBUG_FLAG_DSLITE_DP_ENABLE: + PLATFORM_DEBUG_PRINT("\n Changing dslite debug flag from %d --> %d\n", + dslite_debug_level, debug_value); + dslite_debug_level = debug_value; + break; + + case CNAT_DEBUG_FLAG_NFV9_LOGGING_DUMP_ENABLE: + nfv9_logging_debug_flag = debug_value; + PLATFORM_DEBUG_PRINT("\nnfv9_logging_debug_flag set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_SYSLOG_LOGGING_DUMP_ENABLE: + syslog_debug_flag = debug_value; + PLATFORM_DEBUG_PRINT("\nsyslog_debug_flag set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_MAPE_CONFIG_DEBUG_ENABLE: + mape_config_debug_level = debug_value; + PLATFORM_DEBUG_PRINT("\nmape_config_debug_level set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAG_MAPE_DATA_PATH_DEBUG_ENABLE: + mape_data_path_debug_level = debug_value; + PLATFORM_DEBUG_PRINT("\nmape_data_path_debug_level set to %d\n", debug_value); + break; + + case CNAT_DEBUG_FLAGS_DUMP: + default: + { + PLATFORM_DEBUG_PRINT("\nCurrent values of Debug Variables\n"); + PLATFORM_DEBUG_PRINT("\nTo modify an item chose its index and provide the value\n"); + PLATFORM_DEBUG_PRINT("\n%d: udp_inside_checksum_disable %d\n", + CNAT_DEBUG_FLAG_UDP_INSIDE_CHECKSUM_DISABLE, + udp_inside_checksum_disable); + PLATFORM_DEBUG_PRINT("%d: udp_outside_checksum_disable %d\n", + CNAT_DEBUG_FLAG_UDP_OUTSIDE_CHECKSUM_DISABLE, + udp_outside_checksum_disable); + PLATFORM_DEBUG_PRINT("%d: udp_inside_packet_dump_enable %d\n", + CNAT_DEBUG_FLAG_UDP_OUTSIDE_PKT_DUMP_ENABLE, + udp_inside_packet_dump_enable); + PLATFORM_DEBUG_PRINT("%d: udp_outside_packet_dump_enable %d\n", + CNAT_DEBUG_FLAG_UDP_INSIDE_PKT_DUMP_ENABLE, + udp_outside_packet_dump_enable); + PLATFORM_DEBUG_PRINT("%d: icmp_debug_flag %d\n", + CNAT_DEBUG_FLAG_ICMP_PKT_DUMP_ENABLE, + icmp_debug_flag); + PLATFORM_DEBUG_PRINT("%d: frag_debug_flag %d\n", + CNAT_DEBUG_FLAG_FRAG_PKT_DUMP_ENABLE, + frag_debug_flag); + PLATFORM_DEBUG_PRINT("%d: config_debug_level %d\n", + CNAT_DEBUG_FLAG_CONFIG_DEBUG_ENABLE, + config_debug_level); + PLATFORM_DEBUG_PRINT("%d: global_debug_flag %d\n", + CNAT_DEBUG_FLAG_GLOBAL_DEBUG_ALL_ENABLE, + global_debug_flag); + PLATFORM_DEBUG_PRINT("%d: summary_stats_debug_flag %d\n", + CNAT_DEBUG_FLAG_SUMMARY_STATS_DEBUG_ENABLE, + summary_stats_debug_flag); + PLATFORM_DEBUG_PRINT("%d: show_debug_level %d\n", + CNAT_DEBUG_FLAG_SHOW_DEBUG_ENABLE, + show_debug_level); + PLATFORM_DEBUG_PRINT("%d: xlat_config_debug_level %d\n", + CNAT_DEBUG_FLAG_XLAT_CONFIG_DEBUG_ENABLE, + xlat_config_debug_level); + PLATFORM_DEBUG_PRINT("%d: xlat_data_path_debug_level %d\n", + CNAT_DEBUG_FLAG_XLAT_DATA_PATH_DEBUG_ENABLE, + xlat_data_path_debug_level); + PLATFORM_DEBUG_PRINT("%d: tcp_logging_enable_flag %d\n", + CNAT_DEBUG_FLAG_TCP_LOGGING_ENABLE, + tcp_logging_enable_flag); + PLATFORM_DEBUG_PRINT(" tcp_logging_enable_options DISABLE %d, ENABLE %d, PKT_DUMP %d, SUMMARY_DUMP %d\n", + TCP_LOGGING_DISABLE, TCP_LOGGING_ENABLE, + TCP_LOGGING_PACKET_DUMP, TCP_LOGGING_SUMMARY_DUMP); + PLATFORM_DEBUG_PRINT("%d: nfv9_logging_debug_flag %d\n", + CNAT_DEBUG_FLAG_NFV9_LOGGING_DUMP_ENABLE, + nfv9_logging_debug_flag); + PLATFORM_DEBUG_PRINT("%d: syslog_debug_flag %d\n", + CNAT_DEBUG_FLAG_SYSLOG_LOGGING_DUMP_ENABLE, + syslog_debug_flag); + PLATFORM_DEBUG_PRINT("%d: cnat_static_port_range %d\n", + CNAT_DEBUG_SET_STATIC_PORT_RANGE, + cnat_static_port_range); + PLATFORM_DEBUG_PRINT("%d: v6rd_data_path_debug_level %d\n", + CNAT_DEBUG_FLAG_V6RD_DATA_PATH_DEBUG_ENABLE, + v6rd_data_path_debug_level); + PLATFORM_DEBUG_PRINT("%d: v6rd_config_debug_level %d\n", + CNAT_DEBUG_FLAG_V6RD_CONFIG_DEBUG_ENABLE, + v6rd_config_debug_level); + PLATFORM_DEBUG_PRINT("%d: v6rd_defrag_debug_level %d\n", + CNAT_DEBUG_FLAG_V6RD_DEFRAG_DEBUG_ENABLE, + v6rd_defrag_debug_level); + PLATFORM_DEBUG_PRINT("%d: nat64_stful_debug %d\n", + CNAT_DEBUG_FLAG_NAT64_CONFIG_DEBUG_ENABLE, + nat64_config_debug_level); + PLATFORM_DEBUG_PRINT("%d: nat64_data_path_debug_level %d\n", + CNAT_DEBUG_FLAG_NAT64_DATA_PATH_DEBUG_ENABLE, + nat64_data_path_debug_level); + PLATFORM_DEBUG_PRINT("%d: dslite_debug_level %d\n", + CNAT_DEBUG_FLAG_DSLITE_DP_ENABLE, + dslite_debug_level); + PLATFORM_DEBUG_PRINT("%d: ds_lite_config_debug_level %d\n", + CNAT_DEBUG_FLAG_DSLITE_CONFIG_DEBUG_ENABLE, + ds_lite_config_debug_level); + PLATFORM_DEBUG_PRINT("%d: mape_config_debug_level %d\n", + CNAT_DEBUG_FLAG_MAPE_CONFIG_DEBUG_ENABLE, + mape_config_debug_level); + PLATFORM_DEBUG_PRINT("%d: mape_data_path_debug_level %d\n", + CNAT_DEBUG_FLAG_MAPE_DATA_PATH_DEBUG_ENABLE, + mape_data_path_debug_level); + } + break; + } +} + +extern void dump_cnat_frag_stats(void); + +void spp_api_cnat_p2mp_debug_request_t_handler +(spp_api_cnat_p2mp_debug_request_t *mp) +{ + u16 command_type; + +/* + if (mp->core_num != my_instance_number) { + mp->rc = CNAT_NOT_THIS_CORE; + return; + } +*/ + + command_type = spp_net_to_host_byte_order_16(&mp->dump_type); + PLATFORM_DEBUG_PRINT("-->> Core%d: Received debug msg ... cmd type: %d\n", + my_instance_number, command_type); + + switch (command_type) { + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_POLICY: + PLATFORM_DEBUG_PRINT("Core%d: policy\n", my_instance_number); + cnat_db_dump_policy(); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_MAIN_DB: + PLATFORM_DEBUG_PRINT("Core%d: Main db\n", my_instance_number); + cnat_db_dump_main(); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_MAIN_DB_SUMMARY: + PLATFORM_DEBUG_PRINT("Core%d: Main db Summary\n", my_instance_number); + cnat_db_dump_main_summary(); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_USER_DB: + PLATFORM_DEBUG_PRINT("Core%d: User db\n", my_instance_number); + cnat_db_dump_user(); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_USER_DB_SUMMARY: + PLATFORM_DEBUG_PRINT("Core%d: User db Summary\n", my_instance_number); + cnat_db_dump_user_summary(); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_HASHES_DB: + PLATFORM_DEBUG_PRINT("Core%d: Hashes db\n", my_instance_number); + cnat_db_dump_hashes(); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_VRF_MAP: + PLATFORM_DEBUG_PRINT("Core%d: Vrf map \n", my_instance_number); + cnat_db_dump_portmaps(); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_SUMMARY_DB: + PLATFORM_DEBUG_PRINT("Core%d: dump summary DB \n", my_instance_number); + cnat_db_summary(); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_STATS: + PLATFORM_DEBUG_PRINT("Core%d: dump stats \n", my_instance_number); + spp_node_print_stats(1, NULL); + break; + + /* Currently does same as clear node ctr, may change */ + case CNAT_DEBUG_GENERIC_COMMAND_CLEAR_STATS: + PLATFORM_DEBUG_PRINT("Core%d: clear stats \n", my_instance_number); + spp_node_clear_stats(); + break; + + case CNAT_DEBUG_SPP_LOG: + PLATFORM_DEBUG_PRINT("Core%d: SPP LOG \n", my_instance_number); + spp_log_p2mp_req(mp); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_NODE_COUNTER: + PLATFORM_DEBUG_PRINT("Core%d: NODE Counter dump \n", my_instance_number); + spp_node_print_counters(); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_CLEAR_NODE_COUNTER: + PLATFORM_DEBUG_PRINT("Core%d: clear node counter \n", my_instance_number); + spp_node_clear_stats(); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_CNAT_COUNTER: + PLATFORM_DEBUG_PRINT("Core%d: CNAT Counter dump \n", my_instance_number); + spp_node_print_cnat_counters(); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_VA: + PLATFORM_DEBUG_PRINT("Core%d: VA dump \n", my_instance_number); + { + int argc = 1; + u32 arg[2] = {spp_net_to_host_byte_order_32(&mp->param[0]), 0}; + + cnat_va_dump(argc, arg); + } + break; + + case CNAT_DEBUG_GENERIC_COMMAND_SHOW_CONFIG: + PLATFORM_DEBUG_PRINT("Core%d: Show config dump \n", my_instance_number); + { + int argc = 0; + unsigned long arg[3]; + + if (arg[argc++] = spp_net_to_host_byte_order_32(&mp->param[0])) { + if (arg[argc++] = spp_net_to_host_byte_order_32(&mp->param[1])) { + ; + } else { + argc--; + } + } + + cnat_show_cdb_command_v2(argc, arg); +/* + xlat_show_config(); + cnat_alg_show(); +*/ + v6rd_show_config(); + dslite_show_config(); + nat64_dump_table(); + mape_show_config(); + } + break; + + case CNAT_DEBUG_GENERIC_COMMAND_SHOW_NFV9: + PLATFORM_DEBUG_PRINT("Core%d: NFv9 dump \n", my_instance_number); + #if 0 /* Currently not calling this */ + cnat_nfv9_show_cmd(); + #endif + break; + + case CNAT_DEBUG_GENERIC_COMMAND_SHOW_IVRF: + PLATFORM_DEBUG_PRINT("Core%d: IVRF dump \n", my_instance_number); + { + int argc = 0; + unsigned long arg[3]; + + if (arg[argc++] = spp_net_to_host_byte_order_32(&mp->param[0])) { + if (arg[argc++] = spp_net_to_host_byte_order_32(&mp->param[1])) { + if (arg[argc++] = spp_net_to_host_byte_order_32(&mp->param[2])) { + ; + } else { + argc--; + } + } else { + argc--; + } + } + + + PLATFORM_DEBUG_PRINT("VRF: %d \n", spp_net_to_host_byte_order_32(&mp->param[0])); + PLATFORM_DEBUG_PRINT("2nd arg: %d \n", + spp_net_to_host_byte_order_32(&mp->param[1])); + + cnat_show_ivrf_command_v2(argc, arg); + } + break; + + case CNAT_DEBUG_GENERIC_COMMAND_SHOW_OVRF: + PLATFORM_DEBUG_PRINT("Core%d: OVRF dump \n", my_instance_number); + { + int argc = 0; + unsigned long arg[3]; + if (arg[argc++] = spp_net_to_host_byte_order_32(&mp->param[0])) { + if (arg[argc++] = spp_net_to_host_byte_order_32(&mp->param[1])) { + if (arg[argc++] = spp_net_to_host_byte_order_32(&mp->param[2])) { + ; + } else { + argc--; + } + } else { + argc--; + } + } + + PLATFORM_DEBUG_PRINT("VRF: %d \n", spp_net_to_host_byte_order_32(&mp->param[0])); + PLATFORM_DEBUG_PRINT("2nd arg: %d \n", + spp_net_to_host_byte_order_32(&mp->param[1])); + + cnat_show_ovrf_command_v2(argc, arg); + } + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DEBUG_OPTIONS: + PLATFORM_DEBUG_PRINT("Core%d: Debug option dump \n", my_instance_number); + { + global_pd_dbg_lvl = 0; + global_pi_dbg_lvl = 0; + global_l2_dbg_lvl = 0; + + global_pd_dbg_lvl = + spp_net_to_host_byte_order_32(&mp->param[0]); + global_pi_dbg_lvl = + spp_net_to_host_byte_order_32(&mp->param[1]); + global_l2_dbg_lvl = + spp_net_to_host_byte_order_32(&mp->param[2]); + + PLATFORM_DEBUG_PRINT("global_pd_dbg_lvl: %d, global_pi_dbg_lvl: %d, global_l2_dbg_lvl: %d\n", + global_pd_dbg_lvl, global_pi_dbg_lvl, global_l2_dbg_lvl); + } + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DUMP_DEBUG_LEVELS: + PLATFORM_DEBUG_PRINT("Core%d: PD Debug level: %d \n", my_instance_number, global_pd_dbg_lvl); + PLATFORM_DEBUG_PRINT("Core%d: PI Debug level: %d \n", my_instance_number, global_pi_dbg_lvl); + PLATFORM_DEBUG_PRINT("Core%d: L2 Debug level: %d \n", my_instance_number, global_l2_dbg_lvl); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DEBUG_FLAGS: + PLATFORM_DEBUG_PRINT("Core%d: Debug flags \n", my_instance_number); + cnat_debug_flags_set(mp); + break; + + case CNAT_READ_TEMP_SENSORS: + PLATFORM_INIT_TEMP_SENSORS(); + PLATFORM_READ_CPU_SENSORS(TEMPERATURE_SENSOR_TEST_MODE); + break; + + case CNAT_BLOCK_OCTEON_SENSOR_READ: + + PLATFORM_SET_TEMP_READ_BLOCK(temperature_read_blocked , mp); +#ifdef TARGET_RODDICK + temperature_read_blocked = + spp_net_to_host_byte_order_32(&mp->param[0]); +#endif + break; + + case CNAT_DEBUG_TIMEOUT_DB_SUMMARY: + cnat_db_dump_timeout(); + break; + + /* This option has to be removed later */ + case CNAT_DEBUG_SET_BULK_SIZE: + PLATFORM_DEBUG_PRINT("\nSetting bulk size to %d\n", + spp_net_to_host_byte_order_32(&mp->param[0])); + set_bulk_size_to_all_vrfs( + spp_net_to_host_byte_order_32(&mp->param[0])); + break; + + case CNAT_DEBUG_SHOW_BULK_STAT: + show_bulk_port_stats(); + break; + + case CNAT_DEBUG_CLEAR_BULK_STAT: + clear_bulk_port_stats(); + break; + + case CNAT_DEBUG_SHOW_BULK_ALLOC: + { + u16 in_vrfid = spp_net_to_host_byte_order_32(&mp->param[0]); + u32 inside_ip = spp_net_to_host_byte_order_32(&mp->param[1]); + show_bulk_port_allocation(in_vrfid, inside_ip); + } + break; + + case CNAT_DEBUG_NAT44_IN2OUT_FRAG_STATS: + dump_cnat_frag_stats(); + break; + + default: + mp->rc = CNAT_ERR_INVALID_MSG_ID; + break; + } + + mp->rc = CNAT_SUCCESS; + return; +} + + +void spp_api_cnat_v4_debug_in2out_private_addr_t_handler +(spp_api_cnat_v4_debug_in2out_private_addr_t *mp) +{ + u16 i_vrf; + u32 debug_flag; + u32 start_addr, end_addr; + + + start_addr = + spp_net_to_host_byte_order_32(&mp->start_addr); + end_addr = + spp_net_to_host_byte_order_32(&mp->end_addr); + i_vrf = + spp_net_to_host_byte_order_16(&mp->i_vrf); + debug_flag = + spp_net_to_host_byte_order_32(&mp->debug_flag); + + if ((i_vrf > MAX_UIDX) || (start_addr > end_addr) || + ((debug_flag != CNAT_DEBUG_NONE) && + ((debug_flag & CNAT_DEBUG_ALL) == CNAT_DEBUG_NONE))) { + mp->rc = CNAT_ERR_PARSER; + PLATFORM_DEBUG_PRINT("invalid debug ivrf 0x%x flag 0x%x " + "start addr 0x%x end addr 0x%x\n", + i_vrf, debug_flag, + start_addr, end_addr); + return; + } + + PLATFORM_DEBUG_PRINT("debug ivrf 0x%x flag 0x%x " + "start addr 0x%x end addr 0x%x\n", + i_vrf, debug_flag, + start_addr, end_addr); + + mp->rc = CNAT_SUCCESS; + debug_i_vrf = i_vrf; + debug_i_flag = debug_flag; + debug_i_addr_start = start_addr; + debug_i_addr_end = end_addr; + +} + +void spp_api_cnat_v4_debug_out2in_public_addr_t_handler +(spp_api_cnat_v4_debug_out2in_public_addr_t *mp) +{ + u16 o_vrf; + u32 debug_flag; + u32 start_addr, end_addr; + + start_addr = + spp_net_to_host_byte_order_32(&mp->start_addr); + end_addr = + spp_net_to_host_byte_order_32(&mp->end_addr); + o_vrf = + spp_net_to_host_byte_order_16(&mp->o_vrf); + debug_flag = + spp_net_to_host_byte_order_32(&mp->debug_flag); + + if ((o_vrf > MAX_UIDX) || (start_addr > end_addr) || + ((debug_flag != CNAT_DEBUG_NONE) && + ((debug_flag & CNAT_DEBUG_ALL) == CNAT_DEBUG_NONE))) { + mp->rc = CNAT_ERR_PARSER; + PLATFORM_DEBUG_PRINT("invalid debug ovrf 0x%x flag 0x%x " + "start addr 0x%x end addr 0x%x\n", + o_vrf, debug_flag, + start_addr, end_addr); + return; + } + + mp->rc = CNAT_SUCCESS; + debug_o_vrf = o_vrf; + debug_o_flag = debug_flag; + debug_o_addr_start = start_addr; + debug_o_addr_end = end_addr; + + PLATFORM_DEBUG_PRINT(" o2i debug currently is not supported\n"); +} + +void nat64_reset_session_expiry(nat64_bib_entry_t *db) +{ + NAT64_STFUL_DEBUG_PRINT(3, " invoking nat64_clean_bib_db_entry\n " ); + nat64_clean_bib_db_entry(db); + NAT64_STFUL_DEBUG_PRINT(3, "done with clean_bib_db_entry\n " ); +} + +void spp_api_nat64_clear_db_request_t_handler +(spp_api_nat64_clear_db_request_t *mp) +{ + u16 port, proto, flag; + u32 index; + u32 i; + nat64_bib_entry_t* db; + nat64_v6_key_t ki; + nat64_table_entry_t *my_nat64_table_db_ptr; + u16 nat64_id; + + NAT64_STFUL_FUNC_ENTER; + NAT64_STFUL_DEBUG_DUMP_MSG(mp); + + nat64_id = spp_net_to_host_byte_order_16(&mp->nat64_id); + my_nat64_table_db_ptr = nat64_table_ptr + nat64_id; + + port = spp_net_to_host_byte_order_16(&mp->port_num); + proto = mp->protocol; + + ki.vrf = nat64_id; + ki.vrf |= ((u16)proto << CNAT_PRO_SHIFT); + + for(i =0 ; i< 4 ; i++) + ki.ipv6[i] = spp_net_to_host_byte_order_32(&mp->ip_addr[i]); + + ki.port = port; + + flag = mp->flags; + + mp->rc = CNAT_SUCCESS; + + NAT64_STFUL_DEBUG_PRINT(3, "\n Nat64_id = %d, port =%d, \ + proto =%d, flags=0x%08X",\ + nat64_id, port, proto, flag); + + NAT64_STFUL_DEBUG_PRINT(3, "\n IPv6 Addr = %08X : %08X: %08X: %08X",\ + ki.ipv6[0], ki.ipv6[1], ki.ipv6[2], ki.ipv6[3]); + + if (flag == CNAT_DB_CLEAR_SPECIFIC) { + NAT64_STFUL_DEBUG_PRINT(3, "\n clear specific \n"); + + db = nat64_bib_db_lookup_entry(&ki); + if (db == NULL) { + NAT64_STFUL_DEBUG_PRINT(3, "\n clear specific - not present\n"); + mp->rc = CNAT_NOT_FOUND_ANY; + return; + } + + if( !(db->flags & CNAT_DB_NAT64_FLAG) || + (db->nat64_inst_id != nat64_id)) + return; + + + nat64_reset_session_expiry(db); + return; + } + + pool_header_t *p = pool_header(nat64_bib_db); + + for(index = 0; index < vec_len(nat64_bib_db); index++) { + + /* check is it nat44, if yes skip , do it n nat44 as well */ + + if (PREDICT_FALSE(!clib_bitmap_get(p->free_bitmap, index))) { + db = nat64_bib_db + index; + + if( !(db->flags & CNAT_DB_NAT64_FLAG) || + (db->nat64_inst_id != nat64_id)) + continue; + + if (flag == CNAT_DB_CLEAR_ALL) { + nat64_reset_session_expiry(db); + continue; + } + + if (flag & CNAT_DB_CLEAR_ADDR) { + if ((db->v6_in_key.ipv6[0] != ki.ipv6[0]) || + (db->v6_in_key.ipv6[1] != ki.ipv6[1]) || + (db->v6_in_key.ipv6[2] != ki.ipv6[2]) || + (db->v6_in_key.ipv6[3] != ki.ipv6[3])){ + NAT64_STFUL_DEBUG_PRINT(3, "\n%s:%d\n", __FUNCTION__, \ + __LINE__ ); + continue; + } + } + + if (flag & CNAT_DB_CLEAR_PROTO) { + if (((db->v6_in_key.vrf & CNAT_PRO_MASK) >> CNAT_PRO_SHIFT) + != proto) { + NAT64_STFUL_DEBUG_PRINT(3, "\n%s:%d\n", __FUNCTION__, \ + __LINE__ ); + continue; + } + } + + if (flag & CNAT_DB_CLEAR_PORT) { + if (db->v6_in_key.port != port) { + NAT64_STFUL_DEBUG_PRINT(3, "\n%s:%d\n", __FUNCTION__, \ + __LINE__ ); + continue; + } + } + + NAT64_STFUL_DEBUG_PRINT(3, "\n%s:%d\n", __FUNCTION__, \ + __LINE__ ); + /* + * Delete if the db entry matches and it is not a + */ + nat64_reset_session_expiry(db); + } + } +} + +void inline cnat_clear_session_db(cnat_main_db_entry_t *db) +{ + if(PREDICT_FALSE(db->nsessions > 1)) { + u32 session_index = db->session_head_index; + cnat_session_entry_t *sdb; + do { + sdb = cnat_session_db + session_index; + if(PREDICT_FALSE(!sdb)) { + //TO DO: Debug msg? + break; + } + sdb->entry_expires = 0; + session_index = sdb->main_list.next; + } while(session_index != db->session_head_index + && db->session_head_index != EMPTY); + } + return; +} + +#ifdef CGSE_DS_LITE +extern dslite_table_entry_t dslite_table_array[]; + +void spp_api_ds_lite_clear_db_request_t_handler +(spp_api_ds_lite_clear_db_request_t *mp) +{ + u16 port, proto, flag; + u32 index; + u32 i; + cnat_main_db_entry_t *db; + cnat_user_db_entry_t *udb; + dslite_key_t ki; + dslite_table_entry_t *my_table_db_ptr; + u16 id; + u16 i_vrf; + + + id = spp_net_to_host_byte_order_16(&mp->ds_lite_id); + id = DS_LITE_CONFIG_TO_ARRAY_ID(id); + + my_table_db_ptr = &dslite_table_array[id]; + i_vrf = my_table_db_ptr->i_vrf; + + port = spp_net_to_host_byte_order_16(&mp->port_num); + proto = mp->protocol; + + ki.ipv4_key.k.vrf = i_vrf; + ki.ipv4_key.k.vrf |= ((u16)proto << CNAT_PRO_SHIFT); + + for(i =0 ; i< 4 ; i++) + ki.ipv6[i] = spp_net_to_host_byte_order_32(&mp->ip_addr[i]); + + ki.ipv4_key.k.port = port; + + flag = mp->flags; + + mp->rc = CNAT_SUCCESS; + + DSLITE_PRINTF(3, "\n dslite id = %d, port =%d" + "proto =%d, flags=0x%08X",\ + id, port, proto, flag); + + DSLITE_PRINTF(3, "\n IPv6 Addr = %08X : %08X: %08X: %08X",\ + ki.ipv6[0], ki.ipv6[1], ki.ipv6[2], ki.ipv6[3]); + + if (flag == CNAT_DB_CLEAR_SPECIFIC) { + DSLITE_PRINTF(3, "\n Clear specific NOT supported for DS Lite \n"); + return; + } + + pool_header_t *p = pool_header(cnat_main_db); + + for(index = 0; index < vec_len(cnat_main_db); index++) { + + /* check is it dslite entry, if not skip */ + + if (PREDICT_FALSE(!clib_bitmap_get(p->free_bitmap, index))) { + db = cnat_main_db + index; + + if( !(db->flags & CNAT_DB_DSLITE_FLAG) || + ((db->in2out_key.k.vrf & CNAT_VRF_MASK) != i_vrf) || + (db->flags & CNAT_DB_FLAG_STATIC_PORT)) { + continue; + } + + if (flag == CNAT_DB_CLEAR_ALL) { + + /* + * Make the entry time as very old (0), and wait + * for a timeout to auto-expire the entry. + */ + db->entry_expires = 0; + /* Handle sessions as well.. */ + cnat_clear_session_db(db); + continue; + } + + if (flag & CNAT_DB_CLEAR_ADDR) { + udb = cnat_user_db + db->user_index; + if(PREDICT_FALSE(!udb)) { + continue; + } + if ((udb->ipv6[0] != ki.ipv6[0]) || + (udb->ipv6[1] != ki.ipv6[1]) || + (udb->ipv6[2] != ki.ipv6[2]) || + (udb->ipv6[3] != ki.ipv6[3])) { + continue; + } + } + + if (flag & CNAT_DB_CLEAR_PROTO) { + if (((db->in2out_key.k.vrf & CNAT_PRO_MASK) >> CNAT_PRO_SHIFT) + != proto) { + continue; + } + } + + if (flag & CNAT_DB_CLEAR_PORT) { + if (db->in2out_key.k.port != port) { + continue; + } + } + + /* + * Mark for expiry in the next round of DB scan + */ + db->entry_expires = 0; + /* Handle sessions as well.. */ + cnat_clear_session_db(db); + } + } +} +#endif /* #ifdef CGSE_DS_LITE */ + +void spp_api_cnat_clear_db_request_t_handler +(spp_api_cnat_clear_db_request_t *mp) +{ + u16 i_vrf, port, proto, flag; + u32 ip_addr, index; + u64 a,b,c; + cnat_main_db_entry_t * db; + cnat_db_key_bucket_t ki; + +#if defined(TARGET_LINUX_UDVR) || defined(CNAT_PG) + i_vrf = mp->inside_vrf; + ip_addr = mp->ip_addr; + port = mp->port_num; + proto = mp->protocol; +#else + i_vrf = spp_net_to_host_byte_order_16(&mp->inside_vrf); + ip_addr = spp_net_to_host_byte_order_32(&mp->ip_addr); + port = spp_net_to_host_byte_order_16(&mp->port_num); + proto = spp_net_to_host_byte_order_16(&mp->protocol); +#endif + + + + ki.k.k.vrf = i_vrf; + ki.k.k.vrf |= ((u16)proto << CNAT_PRO_SHIFT); + ki.k.k.ipv4 = ip_addr; + ki.k.k.port = port; + + flag = mp->wildcard; + + mp->rc = CNAT_SUCCESS; + + if (flag == CNAT_DB_CLEAR_SPECIFIC) { + CNAT_V4_GET_HASH(ki.k.key64, + ki.bucket, + CNAT_MAIN_HASH_MASK); + index = cnat_in2out_hash[ki.bucket].next; + if (PREDICT_TRUE(index == EMPTY)) { + mp->rc = CNAT_NOT_FOUND_ANY; + return; + } + + do { + db = cnat_main_db + index; + + /* + * Delete if the db entry matches and it is not a + * STATIC port entry + */ + if ((db->in2out_key.key64 == ki.k.key64) && + !(db->flags & CNAT_DB_FLAG_STATIC_PORT) && + !(db->flags & CNAT_DB_NAT64_FLAG) && + !(db->flags & CNAT_DB_DSLITE_FLAG)) { + + /* + * Make the entry time as very old (0), and wait + * for a timeout to auto-expire the entry. + */ + db->entry_expires = 0; + /* Handle sessions as well.. */ + cnat_clear_session_db(db); + return; + } + index = db->in2out_hash.next; + } while (index != EMPTY); + + mp->rc = CNAT_NOT_FOUND_ANY; + return; + } + + pool_header_t *p = vec_header(cnat_main_db, sizeof(pool_header_t)); + + for(index = 0; index < vec_len(cnat_main_db); index++) { + + if (PREDICT_TRUE(!clib_bitmap_get(p->free_bitmap, index))) { + db = cnat_main_db + index; + + if(PREDICT_FALSE(db->flags & CNAT_DB_NAT64_FLAG)) { + continue; + } + + if(PREDICT_FALSE(db->flags & CNAT_DB_DSLITE_FLAG)) { + continue; + } + + if (flag == CNAT_DB_CLEAR_ALL) { + if (!(db->flags & CNAT_DB_FLAG_STATIC_PORT)) { + db->entry_expires = 0; + /* Handle sessions as well.. */ + cnat_clear_session_db(db); + } + continue; + } + + if (flag & CNAT_DB_CLEAR_VRF) { + if (((db->in2out_key.k.vrf & CNAT_VRF_MASK) != i_vrf)) { + continue; + } + } + + if (flag & CNAT_DB_CLEAR_ADDR) { + if ((db->in2out_key.k.ipv4 != ip_addr)) { + continue; + } + } + + if (flag & CNAT_DB_CLEAR_PROTO) { + if (((db->in2out_key.k.vrf & CNAT_PRO_MASK) >> CNAT_PRO_SHIFT) + != proto) { + continue; + } + } + + if (flag & CNAT_DB_CLEAR_PORT) { + if (db->in2out_key.k.port != port) { + continue; + } + } + + /* + * Delete if the db entry matches and it is not a + * STATIC port entry + */ + if (!(db->flags & CNAT_DB_FLAG_STATIC_PORT)) { + db->entry_expires = 0; + /* Handle sessions as well.. */ + cnat_clear_session_db(db); + } + } + } +} + +void +spp_api_cnat_generic_command_debug (cnat_generic_command_resp *mp_resp) +{ +#ifdef SHOW_DEBUG + u32 i, j; + + i = spp_net_to_host_byte_order_32(&(mp_resp->num_bytes)); + + PLATFORM_DEBUG_PRINT("\nNum_Bytes %d\n", i); + + for (j = 0; j < i; j++) { + PLATFORM_DEBUG_PRINT("0x%02X ", mp_resp->raw_data[j]); + if ((j % 16) == 15) { + PLATFORM_DEBUG_PRINT("\n"); + } + } +#endif +} + +/* + * The following commands implements command to dump the + * user-db information + * port-map information + * for a give user source IP address + * + * The format of the output is: + * Word 0: Address of udb + * Word 1: udb->translation_list_head_index + * Word 2: + * Bytes 0..1: udb->ntranslations + * Bytes 2..2: udb->icmp_msg_coung + * Bytes 3..3: udb->unused + * Word 3: udb->portmap_index + * Word 4: udb->key.k.ipv4 + * Word 5: + * Bytes 0..1: udb->key.k.port = 0 + * Bytes 2..3: udb->key.k.vrf + * Word 6: udb->user_hash + * Word 7: Address of my_pm + * Word 8: my_pm->status + * Word 9: my_pm->inuse + * Word A: my_pm->delete_time + * Word B: my_pm->ipv4_address + */ +void spp_api_cnat_generic_command_user_db_pm +(spp_api_cnat_generic_command_request_t *mp) +{ + u32 i; + cnat_db_key_bucket_t u_ki; + u16 my_vrfmap_index; + u32 *result_array; + cnat_generic_command_resp *mp_resp; + cnat_user_db_entry_t *udb; + cnat_user_db_entry_t *mp_udb; + cnat_vrfmap_t *my_vrfmap; + cnat_portmap_v2_t *pm; + cnat_portmap_v2_t *my_pm; + + /* + * Request structure is used to send the response + */ + mp_resp = (cnat_generic_command_resp *) mp; + + u_ki.k.k.vrf = spp_net_to_host_byte_order_32(&mp->params[1]); + u_ki.k.k.ipv4 = spp_net_to_host_byte_order_32(&mp->params[2]); + u_ki.k.k.port = 0; + + udb = cnat_user_db_lookup_entry(&u_ki); + + if (!udb) { + mp_resp->num_bytes = spp_host_to_net_byte_order_32(0); + goto no_udb_found; + } + + result_array = (u32 *) (&(mp_resp->raw_data[0])); + + i = 0; + result_array[i++] = spp_host_to_net_byte_order_32((u32) udb); + + mp_udb = (cnat_user_db_entry_t *) &(result_array[i]); + + /* + * Align the entry to the next 4 byte boundary + */ + i = i + ((sizeof(cnat_user_db_entry_t)+3)/4); + + /* + * Fill in the UDB information + */ + mp_udb->translation_list_head_index = + spp_host_to_net_byte_order_32(udb->translation_list_head_index); + mp_udb->ntranslations = + spp_host_to_net_byte_order_16(udb->ntranslations); + mp_udb->icmp_msg_count = udb->icmp_msg_count; + mp_udb->flags = udb->flags; + mp_udb->portmap_index = + spp_host_to_net_byte_order_32(udb->portmap_index); + mp_udb->key.k.ipv4 = + spp_host_to_net_byte_order_32(udb->key.k.ipv4); + mp_udb->key.k.port = + spp_host_to_net_byte_order_16(udb->key.k.port); + mp_udb->key.k.vrf = + spp_host_to_net_byte_order_16(udb->key.k.vrf); + mp_udb->user_hash.next = + spp_host_to_net_byte_order_32(udb->user_hash.next); + + my_vrfmap_index = vrf_map_array[u_ki.k.k.vrf]; + my_vrfmap = cnat_map_by_vrf + my_vrfmap_index; + pm = my_vrfmap->portmap_list; + my_pm = pm + udb->portmap_index; + + /* + * Fill in the port_map information + */ + result_array[i++] = spp_host_to_net_byte_order_32((u32) my_pm); + result_array[i++] = spp_host_to_net_byte_order_32(my_pm->inuse); + result_array[i++] = spp_host_to_net_byte_order_32(my_pm->delete_time); + result_array[i++] = spp_host_to_net_byte_order_32(my_pm->ipv4_address); + + mp_resp->num_bytes = spp_host_to_net_byte_order_32(i*4); + +no_udb_found: + spp_api_cnat_generic_command_debug(mp_resp); +} + +/* + * The following commands implements command to dump the + * DB usage stats for + * main-db + * user-db + * in2out hash + * out2in hash + * + * The format of the output is: + * Word 0: Main-DB - Total + * Word 1: Main-DB - Active + * Word 2: Main-DB - Free + * Word 3: User-DB - Total + * Word 4: User-DB - Active + * Word 5: User-DB - Free + * Word 6: Hash In2Out - Size + * Word 7: Hash In2Out - Used + * Word 8: Hash In2Out - Used Percentage + * Word 9: Hash Out2In - Size + * Word A: Hash Out2In - Used + * Word B: Hash Out2In - Used Percentage + */ +void spp_api_cnat_generic_command_db_summary +(spp_api_cnat_generic_command_request_t *mp) +{ + u32 count1, count2, count3; + u32 i = 0; + u32 k = 0; + cnat_generic_command_resp *mp_resp; + u32 *result_array; + + /* + * Request structure is used to send the response + */ + mp_resp = (cnat_generic_command_resp *) mp; + result_array = (u32 *) (&(mp_resp->raw_data[0])); + + /* + * Find entries free and used in main-db + */ + count1 = vec_len(cnat_main_db); + count2 = db_free_entry(cnat_main_db); + count3 = count1 - count2; + + *(result_array + i++) = spp_host_to_net_byte_order_32(count1); + *(result_array + i++) = spp_host_to_net_byte_order_32(count3); + *(result_array + i++) = spp_host_to_net_byte_order_32(count2); + + /* + * Find entries free and used in user-db + */ + count1 = vec_len(cnat_user_db); + count2 = db_free_entry(cnat_user_db); + count3 = count1 - count2; + + *(result_array + i++) = spp_host_to_net_byte_order_32(count1); + *(result_array + i++) = spp_host_to_net_byte_order_32(count3); + *(result_array + i++) = spp_host_to_net_byte_order_32(count2); + + /* + * Find entries used in in2out and out2in hash tables + * and percentage utilization. + */ + count1 = count2 = 0; + for (k = 0; k < CNAT_MAIN_HASH_SIZE; k++) { + if(cnat_in2out_hash[k].next != ~0) count1++; + if(cnat_out2in_hash[k].next != ~0) count2++; + + } + + count3 = count1*100/CNAT_MAIN_HASH_SIZE; + + *(result_array + i++) = spp_host_to_net_byte_order_32(CNAT_MAIN_HASH_SIZE); + *(result_array + i++) = spp_host_to_net_byte_order_32(count1); + *(result_array + i++) = spp_host_to_net_byte_order_32(count3); + + count3 = count2*100/CNAT_MAIN_HASH_SIZE; + + *(result_array + i++) = spp_host_to_net_byte_order_32(CNAT_MAIN_HASH_SIZE); + *(result_array + i++) = spp_host_to_net_byte_order_32(count2); + *(result_array + i++) = spp_host_to_net_byte_order_32(count3); + + mp_resp->num_bytes = spp_host_to_net_byte_order_32(i*4); + + spp_api_cnat_generic_command_debug(mp_resp); +} + +/* + * The following commands implements generic commands such as: + * + * Command 1: + * Reads num_bytes octets from a start_locn + * generic command <core_num> <cmd_type=1> <start_locn> <num_bytes> 0 0 0 0 0 + * + * Command 2: + * Writes upto 8 octets from a start_locn + * generic command <core_num> <cmd_type=2> <start_locn> <num_bytes> 0 0 0 0 0 + * + * Command 3: + * Dump the db summary stats + * generic command <core_num> <cmd_type=3> + * + * Command 4: + * Dump the user db entry + * generic command <core_num> <cmd_type=4> <vrf_id> <src_ip_addr> + * + * The following structures are referenced by this command: + * typedef struct _spp_api_cnat_generic_command_request { + * u16 _spp_msg_id; + * u8 rc; + * u8 core_num; + * u32 params[8]; + * } spp_api_cnat_generic_command_request_t; + * + * typedef struct { + * u16 spp_msg_id; + * u8 rc; + * u8 core; + * u32 num_bytes; + * u8 raw_data[0]; + * } cnat_generic_command_resp; + * + */ +void spp_api_cnat_generic_command_request_t_handler +(spp_api_cnat_generic_command_request_t *mp) +{ + cnat_generic_command_resp *resp_ptr; + u32 command_type, start_locn, num_bytes; + + command_type = spp_net_to_host_byte_order_32(&mp->params[0]); + resp_ptr = (cnat_generic_command_resp *) mp; + + switch (command_type) { + case CNAT_DEBUG_GENERIC_COMMAND_READ_MEM: + start_locn = spp_net_to_host_byte_order_32(&mp->params[1]); + num_bytes = spp_net_to_host_byte_order_32(&mp->params[2]); + memcpy(&(resp_ptr->raw_data[0]), (u8 *) start_locn, num_bytes); + resp_ptr->num_bytes = spp_host_to_net_byte_order_32(num_bytes); + +#ifdef SHOW_DEBUG + { + u32 i; + + for (i = 0; i < num_bytes; i++) { + PLATFORM_DEBUG_PRINT("0x%02X ", resp_ptr->raw_data[i]); + if ((i % 16) == 15) { + PLATFORM_DEBUG_PRINT("\n"); + } + } + } +#endif + break; + + case CNAT_DEBUG_GENERIC_COMMAND_WRITE_MEM: + start_locn = spp_net_to_host_byte_order_32(&mp->params[1]); + num_bytes = spp_net_to_host_byte_order_32(&mp->params[2]); + + if (num_bytes > sizeof(u64)) { + mp->rc = CNAT_ERR_INVALID_MSG_SIZE; + return; + } + + memcpy((u8 *) start_locn, &(mp->params[3]), num_bytes); + resp_ptr->num_bytes = 0; + break; + + case CNAT_DEBUG_GENERIC_COMMAND_DB_SUMMARY: + spp_api_cnat_generic_command_db_summary(mp); + break; + + case CNAT_DEBUG_GENERIC_COMMAND_USER_DB_PM: + spp_api_cnat_generic_command_user_db_pm(mp); + break; + + case CNAT_DEBUG_GET_CGN_DB_SUMMARY: + spp_api_cnat_get_cgn_db_summary(mp); + break; + + default: + mp->rc = CNAT_ERR_INVALID_MSG_ID; + break; + } +} + + +static int cnat_debug_init (void *notused) +{ + spp_msg_api_set_handler(SPP_API_CNAT_V4_DEBUG_DUMMY, + spp_api_cnat_v4_debug_dummy_t_handler); + + spp_msg_api_set_handler(SPP_API_CNAT_V4_DEBUG_DUMMY_MAX, + spp_api_cnat_v4_debug_dummy_max_t_handler); + + spp_msg_api_set_handler(SPP_API_CNAT_V4_DEBUG_GLOBAL, + spp_api_cnat_v4_debug_global_t_handler); + + spp_msg_api_set_handler(SPP_API_CNAT_V4_DEBUG_IN2OUT_PRIVATE_ADDR, + spp_api_cnat_v4_debug_in2out_private_addr_t_handler); + + spp_msg_api_set_handler(SPP_API_CNAT_V4_DEBUG_OUT2IN_PUBLIC_ADDR, + spp_api_cnat_v4_debug_out2in_public_addr_t_handler); + + spp_msg_api_set_handler(SPP_API_CNAT_CLEAR_DB_REQUEST, + spp_api_cnat_clear_db_request_t_handler); + + spp_msg_api_set_handler(SPP_API_CNAT_GENERIC_COMMAND_REQUEST, + spp_api_cnat_generic_command_request_t_handler); + + spp_msg_api_set_handler(SPP_API_CNAT_P2MP_DEBUG_REQUEST, + spp_api_cnat_p2mp_debug_request_t_handler); + + spp_msg_api_set_handler(SPP_API_NAT64_CLEAR_DB_REQUEST, + spp_api_nat64_clear_db_request_t_handler); + + spp_msg_api_set_handler(SPP_API_DS_LITE_CLEAR_DB_REQUEST, + spp_api_ds_lite_clear_db_request_t_handler); + + return 0; +} + +/* +************************ +* spp_api_cnat_get_cgn_db_summary +* This is for finding out the per core CPU users and utilization +************************ +*/ + +void spp_api_cnat_get_cgn_db_summary +(spp_api_cnat_generic_command_request_t *mp) +{ + u32 total_db_entries, total_free_entries, used_entries; + u32 i = 0; + cnat_generic_command_resp *mp_resp; + u32 *result_array; + + /* + * Request structure is used to send the response + */ + mp_resp = (cnat_generic_command_resp *) mp; + result_array = (u32 *) (&(mp_resp->raw_data[0])); + + /* + * Find entries free and used in main-db + */ + total_db_entries = vec_len(cnat_main_db); + total_free_entries = db_free_entry(cnat_main_db); + used_entries = total_db_entries - total_free_entries; + + *(result_array + i++) = spp_host_to_net_byte_order_32(total_db_entries); + *(result_array + i++) = spp_host_to_net_byte_order_32(used_entries); + *(result_array + i++) = spp_host_to_net_byte_order_32(total_free_entries); + + /* + * Find entries free and used in user-db + */ + total_db_entries = vec_len(cnat_user_db); + total_free_entries = db_free_entry(cnat_user_db); + used_entries = total_db_entries - total_free_entries; + + *(result_array + i++) = spp_host_to_net_byte_order_32(total_db_entries); + *(result_array + i++) = spp_host_to_net_byte_order_32(used_entries); + *(result_array + i++) = spp_host_to_net_byte_order_32(total_free_entries); + + mp_resp->num_bytes = spp_host_to_net_byte_order_32(i*sizeof(u32)); +} + +SPP_INIT_FUNCTION(cnat_debug_init); +#endif /* TOBE_PORTED */ diff --git a/vnet/vnet/vcgn/cnat_global.c b/vnet/vnet/vcgn/cnat_global.c new file mode 100644 index 00000000000..9ab89eeaf05 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_global.c @@ -0,0 +1,79 @@ +/* + *------------------------------------------------------------------ + * cnat_global.c - global variables + * + * Copyright (c) 2008-2009, 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +/* gloable variables */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "dslite_defs.h" +#include "tcp_header_definitions.h" +u32 cnat_current_time; +u8 nfv9_configured = 0; +/* ctx/sf alloc error counters */ +u32 null_enq_pkt; +u32 null_deq_pkt; + +u32 null_enq_ctx; +u32 null_deq_ctx; + +u32 null_enq_wqe; +u32 null_deq_wqe; + +u32 ctx_alloc_errs; +u32 sf_alloc_errs; + +u32 rcv_pkt_errs; + +/* TOBE_PORTED : Remove following once we bring DSLite */ +u32 dslite_config_debug_level = 1; +u32 dslite_data_path_debug_level = 1; +u32 dslite_defrag_debug_level = 1; +u32 dslite_debug_level = 1; + +dslite_table_entry_t *dslite_table_db_ptr; + +/* + * ipv4_decr_ttl_n_calc_csum() + * - It decrements the TTL and calculates the incremental IPv4 checksum + */ + +/* TOBE_PORTED: Following is in cnat_util.c */ +always_inline +void ipv4_decr_ttl_n_calc_csum(ipv4_header *ipv4) +{ + u32 checksum; + u16 old; + u16 ttl; + + ttl = ipv4->ttl; + old = clib_net_to_host_u16(ttl); + + /* Decrement TTL */ + ipv4->ttl--; + + /* Calculate incremental checksum */ + checksum = old + (~clib_net_to_host_u16(ttl) & 0xFFFF); + checksum += clib_net_to_host_u16(ipv4->checksum); + checksum = (checksum & 0xFFFF) + (checksum >> 16); + ipv4->checksum = clib_host_to_net_u32(checksum + (checksum >> 16)); +} + diff --git a/vnet/vnet/vcgn/cnat_global.h b/vnet/vnet/vcgn/cnat_global.h new file mode 100644 index 00000000000..823a47974d4 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_global.h @@ -0,0 +1,87 @@ +/* + *------------------------------------------------------------------ + * cnat_global.h - global definition and variables + * to be used by non cnat files + * + * Copyright (c) 2007-2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_GLOBAL_H__ +#define __CNAT_GLOBAL_H__ + +/* gloable variables */ + +extern u8 cnat_db_init_done; +extern u32 cnat_current_time; +extern u64 in2out_drops_port_limit_exceeded; +extern u64 in2out_drops_system_limit_reached; +extern u64 in2out_drops_resource_depletion; +extern u64 no_translation_entry_drops; +extern u8 nfv9_configured; +extern u32 translation_create_count; +extern u32 translation_create_rate; + +extern u32 translation_delete_count; +extern u32 translation_delete_rate; + +extern u32 in2out_forwarding_count; +extern u32 in2out_forwarding_rate; + +extern u32 out2in_forwarding_count; +extern u32 out2in_forwarding_rate; + +extern u32 total_address_pool_allocated; + +extern u32 nat44_active_translations; + +#if 1 //DSLITE_DEF +extern u32 dslite_translation_create_rate; +extern u32 dslite_translation_delete_rate; +extern u32 dslite_translation_create_count; +extern u32 dslite_in2out_forwarding_count; +extern u32 dslite_in2out_forwarding_count; +extern u32 dslite_out2in_forwarding_rate; +#endif +/* sf/ctx allocation error collection declarations */ +#define COLLECT_FREQ_FACTOR 100 +#define NUM_SECONDS_TO_WAIT 10 +#define COUNTER_BUFFER_SIZE 25 + +extern u32 null_enq_pkt; +extern u32 null_deq_pkt; + +extern u32 null_enq_ctx; +extern u32 null_deq_ctx; + +extern u32 null_enq_wqe; +extern u32 null_deq_wqe; + +extern u32 ctx_alloc_errs; +extern u32 sf_alloc_errs; + +extern u32 rcv_pkt_errs; + +struct counter_array_t { + u32 sf_error_counter; + u32 ctx_error_counter; + u32 timestamp; +} counter_array_t; + +#define COUNTER_BUFFER_SIZE 25 +struct counter_array_t err_cnt_arr[COUNTER_BUFFER_SIZE]; + +//#define DISABLE_ICMP_THROTTLE_FOR_DEBUG_PURPOSE + +#endif /*__CNAT_GLOBAL_H__*/ diff --git a/vnet/vnet/vcgn/cnat_ipv4_icmp.h b/vnet/vnet/vcgn/cnat_ipv4_icmp.h new file mode 100644 index 00000000000..69505a0364f --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_icmp.h @@ -0,0 +1,51 @@ +/* + *----------------------------------------------------------------------------- + * + * Filename: cnat_ipv4_icmp.h + * + * Description: common functions for icmp node + * + * Assumptions and Constraints: + * + * Copyright (c) 2000-2009, 2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *----------------------------------------------------------------------------- + */ + +#ifndef __CNAT_IPV4_ICMP_H__ +#define __CNAT_IPV4_ICMP_H__ + +#include "tcp_header_definitions.h" +#include "cnat_db.h" +#include "cnat_v4_functions.h" +#include "cnat_global.h" +#include "cnat_config.h" + +typedef struct { + icmp_v4_t *icmp; + ipv4_header *em_ip; + u16 *em_port; + u16 *em_l4_checksum; +} icmp_em_ip_info; + +inline void swap_ip_src_icmp_id(ipv4_header *ip, + icmp_v4_t *icmp, + cnat_main_db_entry_t *db, + u16 vrf); + +inline void swap_ip_dst_icmp_id(ipv4_header *ip, + icmp_v4_t *icmp, + cnat_main_db_entry_t *db, + u16 vrf); + +#endif /* __CNAT_IPV4_ICMP_H__ */ diff --git a/vnet/vnet/vcgn/cnat_ipv4_icmp_error_inside_input.c b/vnet/vnet/vcgn/cnat_ipv4_icmp_error_inside_input.c new file mode 100644 index 00000000000..218d7e538fa --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_icmp_error_inside_input.c @@ -0,0 +1,476 @@ +/* + *--------------------------------------------------------------------------- + * cnat_ipv4_icmp_error_inside_input.c - cnat_ipv4_icmp_error_inside_input node pipeline stage functions + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_ipv4_icmp.h" + +#define foreach_cnat_ipv4_icmp_e_inside_input_error \ +_(CNAT_V4_ICMP_E_I2O_T_PKT, "cnat v4 icmp_e i2o packet transmit") \ +_(CNAT_V4_ICMP_E_I2O_D_PKT, "cnat v4 icmp_e i2o packet drop") \ +_(CNAT_V4_ICMP_E_I2O_TTL_DROP, "cnat v4 icmp_e i2o ttl drop") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_icmp_e_inside_input_error +#undef _ + CNAT_IPV4_ICMP_E_INSIDE_INPUT_N_ERROR, +} cnat_ipv4_icmp_e_inside_input_t; + +static char * cnat_ipv4_icmp_e_inside_input_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_icmp_e_inside_input_error +#undef _ +}; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ipv4_icmp_e_inside_input_main_t; + +typedef enum { + CNAT_V4_ICMP_E_I2O_T, + CNAT_V4_ICMP_E_I2O_D, + CNAT_V4_ICMP_E_I2O_NEXT, +} cnat_ipv4_icmp_e_inside_input_next_t; + +cnat_ipv4_icmp_e_inside_input_main_t cnat_ipv4_icmp_e_inside_input_main; +vlib_node_registration_t cnat_ipv4_icmp_e_inside_input_node; + +#define NSTAGES 5 + +inline void swap_ip_src_emip_dst(ipv4_header *ip, + icmp_em_ip_info *icmp_info, + cnat_main_db_entry_t *db, u16 vrf) +{ + icmp_v4_t *icmp; + ipv4_header *em_ip; + u16 *em_port; + u32 old_ip; + u16 old_port; + u16 old_ip_checksum; + + /* + * declear variable + */ + CNAT_UPDATE_L3_CHECKSUM_DECLARE + CNAT_UPDATE_ICMP_ERR_CHECKSUM_DECLARE + + /* + * fix inner layer ip & l4 checksum + */ + em_ip = icmp_info->em_ip; + em_port = icmp_info->em_port; + + CNAT_UPDATE_L3_CHECKSUM(((u16)(db->in2out_key.k.ipv4)), + ((u16)(db->in2out_key.k.ipv4 >> 16)), + (clib_net_to_host_u16(em_ip->checksum)), + ((u16)(db->out2in_key.k.ipv4)), + ((u16)(db->out2in_key.k.ipv4 >> 16))) + + old_ip = clib_net_to_host_u32(em_ip->dest_addr); + old_port = clib_net_to_host_u16(*em_port); + old_ip_checksum = clib_net_to_host_u16(em_ip->checksum); + + em_ip->dest_addr = + clib_host_to_net_u32(db->out2in_key.k.ipv4); + em_ip->checksum = + clib_host_to_net_u16(new_l3_c); + *em_port = + clib_host_to_net_u16(db->out2in_key.k.port); + + /* + * fix outter layer ip & icmp checksum + */ + icmp = icmp_info->icmp; + CNAT_UPDATE_ICMP_ERR_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (old_port), + (old_ip_checksum), + (clib_net_to_host_u16(icmp->checksum)), + ((u16)(db->out2in_key.k.ipv4 & 0xffff)), + ((u16)(db->out2in_key.k.ipv4 >> 16)), + ((u16)(db->out2in_key.k.port)), + ((u16)(new_l3_c))) + + icmp->checksum = + clib_host_to_net_u16(new_icmp_c); + + old_ip = clib_net_to_host_u32(ip->src_addr); + + ip->src_addr = + clib_host_to_net_u32(db->out2in_key.k.ipv4); + + CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (clib_net_to_host_u16(ip->checksum)), + ((u16)(db->out2in_key.k.ipv4)), + ((u16)(db->out2in_key.k.ipv4 >> 16))) + ip->checksum = + clib_host_to_net_u16(new_l3_c); + +#if 0 + if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) { + /* + * fix inner layer ip & l4 checksum + */ + em_snat_ip = icmp_info->em_ip; + em_snat_port = icmp_info->em_port; + + old_ip = spp_net_to_host_byte_order_32(&(em_snat_ip->src_addr)); + old_port = spp_net_to_host_byte_order_16(em_snat_port); + old_ip_checksum = spp_net_to_host_byte_order_16(&(em_snat_ip->checksum)); + direction = 0; + if(cnat_static_dest_db_get_translation(em_snat_ip->src_addr, &postmap_ip, vrf, direction) == CNAT_SUCCESS) { + old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip); + + CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip)), + ((u16)(old_ip >> 16)), + (spp_net_to_host_byte_order_16(&(em_snat_ip->checksum))), + ((u16)(old_postmap_ip)), + ((u16)(old_postmap_ip >> 16))) + em_snat_ip->src_addr = postmap_ip; + em_snat_ip->checksum = + spp_host_to_net_byte_order_16(new_l3_c); + + /* + * fix outter layer ip & icmp checksum + */ + icmp = icmp_info->icmp; + CNAT_UPDATE_ICMP_ERR_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (old_port), + (old_ip_checksum), + (spp_net_to_host_byte_order_16(&(icmp->checksum))), + ((u16)(old_postmap_ip & 0xffff)), + ((u16)(old_postmap_ip >> 16)), + ((u16)(old_port)), + ((u16)(new_l3_c))) + + icmp->checksum = + spp_host_to_net_byte_order_16(new_icmp_c); + + } + } + + if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) { + direction = 0; + if(cnat_static_dest_db_get_translation(ip->dest_addr, &postmap_ip, vrf, direction) == CNAT_SUCCESS) { + + old_ip = spp_net_to_host_byte_order_32(&(ip->dest_addr)); + old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip); + + CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (spp_net_to_host_byte_order_16(&(ip->checksum))), + ((u16)(old_postmap_ip & 0xFFFF)), + ((u16)(old_postmap_ip >> 16))) + ip->dest_addr = postmap_ip; + + ip->checksum = + clib_host_to_net_u16(new_l3_c); + } + } +#endif /* if 0 */ + +} + +/* + * Use the generic buffer metadata + first line of packet data prefetch + * stage function from <api/pipeline.h>. This is usually a Good Idea. + */ +#define stage0 generic_stage0 + + +static inline void +stage1(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + u64 a, b, c; + u32 bucket; + u8 *prefetch_target; + + vlib_buffer_t * b0 = vlib_get_buffer (vm, buffer_index); + ipv4_header *ip = vlib_buffer_get_current (b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len); + ipv4_header *em_ip = (ipv4_header*)((u8*)icmp + 8); /* embedded pkt's v4 hdr */ + u8 em_ip_hdr_len = (em_ip->version_hdr_len_words & 0xf) << 2; + + u64 tmp = 0; + u32 protocol = CNAT_ICMP; + + /* Check L4 header for embedded packet */ + if (em_ip->protocol == TCP_PROT) { + tcp_hdr_type *tcp = (tcp_hdr_type*)((u8 *)em_ip + em_ip_hdr_len); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16(tcp->dest_port); + protocol = CNAT_TCP; + + } else if (em_ip->protocol == UDP_PROT) { + udp_hdr_type_t *udp = (udp_hdr_type_t *)((u8 *)em_ip + em_ip_hdr_len); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16(udp->dest_port); + protocol = CNAT_UDP; + + } else { + icmp_v4_t *icmp = (icmp_v4_t*)((u8 *)em_ip + em_ip_hdr_len); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16(icmp->identifier); + + if (PREDICT_FALSE((icmp->type != ICMPV4_ECHOREPLY) && + (icmp->type != ICMPV4_ECHO))) { + /* + * Try to set invalid protocol for these cases, so that + * hash lookup does not return valid main_db. This approach + * may optimize the regular cases with valid protocols + * as it avoids one more check for regular cases in stage3 + */ + protocol = CNAT_INVALID_PROTO; + } + } + + tmp = vnet_buffer(b0)->vcgn_uii.key.k.ipv4 = + clib_net_to_host_u32(em_ip->dest_addr); + + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.port) << 32; + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + vnet_buffer(b0)->vcgn_uii.key.k.vrf, + protocol) + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.vrf) << 48; + + CNAT_V4_GET_HASH(tmp, bucket, CNAT_MAIN_HASH_MASK) + + prefetch_target = (u8 *)(&cnat_in2out_hash[bucket]); + vnet_buffer(b0)->vcgn_uii.bucket = bucket; + + /* Prefetch the hash bucket */ + CLIB_PREFETCH(prefetch_target, CLIB_CACHE_LINE_BYTES, LOAD); +} + +#define SPP_LOG2_CACHE_LINE_BYTES 6 +#define SPP_CACHE_LINE_BYTES (1 << SPP_LOG2_CACHE_LINE_BYTES) + +static inline void +stage2(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + uword prefetch_target0, prefetch_target1; + u32 bucket = vnet_buffer(b0)->vcgn_uii.bucket; + + /* read the hash bucket */ + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket + = cnat_in2out_hash[bucket].next; + + if (PREDICT_TRUE(db_index != EMPTY)) { + /* + * Prefetch database keys. We save space by not cache-line + * aligning the DB entries. We don't want to waste LSU + * bandwidth prefetching stuff we won't need. + */ + prefetch_target0 = (uword)(cnat_main_db + db_index); + CLIB_PREFETCH((void*)prefetch_target0, CLIB_CACHE_LINE_BYTES, LOAD); + /* Just beyond DB key #2 */ + prefetch_target1 = prefetch_target0 + + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports); + /* If the targets are in different lines, do the second prefetch */ + if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) != + (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) { + CLIB_PREFETCH((void *)prefetch_target1, CLIB_CACHE_LINE_BYTES, LOAD); + } + } +} + +static inline void +stage3(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + cnat_main_db_entry_t *db; + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + + /* + * Note: if the search already failed (empty bucket), + * the answer is already in the pipeline context structure + */ + if (PREDICT_TRUE(db_index != EMPTY)) { + + /* + * Note: hash collisions suck. We can't easily prefetch around them. + * The first trip around the track will be fast. After that, maybe + * not so much... + */ + do { + db = cnat_main_db + db_index; + if (PREDICT_TRUE(db->in2out_key.key64 == + vnet_buffer(b0)->vcgn_uii.key.key64)) { + break; + } + db_index = db->in2out_hash.next; + } while (db_index != EMPTY); + + /* Stick the answer back into the pipeline context structure */ + vnet_buffer(b0)->vcgn_uii.bucket = db_index; + } +} + + + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + int disposition = CNAT_V4_ICMP_E_I2O_T; + int counter = CNAT_V4_ICMP_E_I2O_T_PKT; + + ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len); + ipv4_header *em_ip = (ipv4_header*)((u8*)icmp + 8); /* embedded pkt's v4 hdr */ + u8 em_ip_hdr_len = (em_ip->version_hdr_len_words & 0xf) << 2; + vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_icmp_e_inside_input_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + cnat_main_db_entry_t *db = NULL; + icmp_em_ip_info icmp_info; + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + if (PREDICT_FALSE(ip->ttl <= 1)) { + /* + * As it is ICMP error packet with TTL <= 1, + * let's drop the packet (no need to genereate + * another ICMP error). + */ + + disposition = CNAT_V4_ICMP_E_I2O_D; + counter = CNAT_V4_ICMP_E_I2O_TTL_DROP; + + goto drop_pkt; + } + } + + if (PREDICT_TRUE(db_index != EMPTY)) { + icmp_info.em_ip = em_ip; + icmp_info.icmp = icmp; + //icmp_info.em_port = vnet_buffer(b0)->vcgn_uii.key.k.port; + + /* Note: This could have been done in stage1 itself, + * but we need to introduce one u16 * in vnet_buffer_opaque_t + * Since this flow is expected to be very rare in actual + * deployment scenario, we may afford to do these steps here + * as well. Lets confirm during core review. */ + if (em_ip->protocol == TCP_PROT) { + tcp_hdr_type *tcp = (tcp_hdr_type*)((u8 *)em_ip + em_ip_hdr_len); + icmp_info.em_port = &(tcp->dest_port); + } else if (em_ip->protocol == UDP_PROT) { + udp_hdr_type_t *udp = (udp_hdr_type_t *) + ((u8 *)em_ip + em_ip_hdr_len); + icmp_info.em_port = &(udp->dest_port); + } else { + icmp_v4_t *icmp_inner = (icmp_v4_t*)((u8 *)em_ip + em_ip_hdr_len); + icmp_info.em_port = &(icmp_inner->identifier); + } + + db = cnat_main_db + db_index; + /* + * 1. update dst addr:dst port of embedded ip pkt + * update src addr of icmp pkt + * 2. fix udp/tcp/ip checksum of embedded pkt + * fix icmp, ip check of icmp pkt + * don need to update the timer + */ + + if (PREDICT_FALSE(icmp_debug_flag)) { + printf("\nDUMPING ICMP PKT BEFORE\n"); + print_icmp_pkt(ip); + } + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + + swap_ip_src_emip_dst(ip, &icmp_info, + db, db->in2out_key.k.vrf); + + if (PREDICT_FALSE(icmp_debug_flag)) { + printf("\nDUMPING ICMP PKT AFTER\n"); + print_icmp_pkt(ip); + } + in2out_forwarding_count++; + + } else { + disposition = CNAT_V4_ICMP_E_I2O_D; + counter = CNAT_V4_ICMP_E_I2O_D_PKT; + } + +drop_pkt: + + em->counters[node_counter_base_index + counter] += 1; + return disposition; +} + +#include <vnet/pipeline.h> + +static uword cnat_ipv4_icmp_e_inside_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + + +VLIB_REGISTER_NODE (cnat_ipv4_icmp_e_inside_input_node) = { + .function = cnat_ipv4_icmp_e_inside_input_node_fn, + .name = "vcgn-v4-icmp-e-i2o", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_ipv4_icmp_e_inside_input_error_strings), + .error_strings = cnat_ipv4_icmp_e_inside_input_error_strings, + + .n_next_nodes = CNAT_V4_ICMP_E_I2O_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [CNAT_V4_ICMP_E_I2O_T] = "ip4-input", + [CNAT_V4_ICMP_E_I2O_D] = "error-drop", + }, +}; + +clib_error_t *cnat_ipv4_icmp_e_inside_input_init (vlib_main_t *vm) +{ + cnat_ipv4_icmp_e_inside_input_main_t * mp = &cnat_ipv4_icmp_e_inside_input_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_ipv4_icmp_e_inside_input_init); diff --git a/vnet/vnet/vcgn/cnat_ipv4_icmp_error_outside_input.c b/vnet/vnet/vcgn/cnat_ipv4_icmp_error_outside_input.c new file mode 100644 index 00000000000..f25f4d022c7 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_icmp_error_outside_input.c @@ -0,0 +1,452 @@ +/* + *--------------------------------------------------------------------------- + * cnat_ipv4_icmp_error_outside_input.c - cnat_ipv4_icmp_error_outside_input node pipeline stage functions + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_ipv4_icmp.h" + +#define foreach_cnat_ipv4_icmp_e_outside_input_error \ +_(CNAT_V4_ICMP_E_O2I_T_PKT, "cnat v4 icmp_e o2i packet transmit") \ +_(CNAT_V4_ICMP_E_O2I_D_PKT, "cnat v4 icmp_e o2i packet drop") \ +_(CNAT_V4_ICMP_E_O2I_TTL_DROP, "cnat v4 icmp_e o2i ttl drop") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_icmp_e_outside_input_error +#undef _ + CNAT_IPV4_ICMP_E_OUTSIDE_INPUT_N_ERROR, +} cnat_ipv4_icmp_e_outside_input_t; + +static char * cnat_ipv4_icmp_e_outside_input_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_icmp_e_outside_input_error +#undef _ +}; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ipv4_icmp_e_outside_input_main_t; + +typedef enum { + CNAT_V4_ICMP_E_O2I_T, + CNAT_V4_ICMP_E_O2I_D, + CNAT_V4_ICMP_E_O2I_NEXT, +} cnat_ipv4_icmp_e_outside_input_next_t; + +cnat_ipv4_icmp_e_outside_input_main_t cnat_ipv4_icmp_e_outside_input_main; +vlib_node_registration_t cnat_ipv4_icmp_e_outside_input_node; + +#define NSTAGES 5 + +inline void swap_ip_dst_emip_src(ipv4_header *ip, + icmp_em_ip_info *icmp_info, + cnat_main_db_entry_t *db, u16 vrf) +{ + icmp_v4_t *icmp; + ipv4_header *em_ip; + u16 *em_port; + u32 old_ip; + u16 old_port; + u16 old_ip_checksum; + + /* + * declear variable + */ + CNAT_UPDATE_L3_CHECKSUM_DECLARE + CNAT_UPDATE_ICMP_ERR_CHECKSUM_DECLARE + + /* + * fix inner layer ip & l4 checksum + */ + em_ip = icmp_info->em_ip; + em_port = icmp_info->em_port; + + CNAT_UPDATE_L3_CHECKSUM(((u16)(db->out2in_key.k.ipv4)), + ((u16)(db->out2in_key.k.ipv4 >> 16)), + (clib_net_to_host_u16(em_ip->checksum)), + ((u16)(db->in2out_key.k.ipv4)), + ((u16)(db->in2out_key.k.ipv4 >> 16))) + + old_ip = clib_net_to_host_u32(em_ip->src_addr); + old_port = clib_net_to_host_u16(*em_port); + old_ip_checksum = clib_net_to_host_u16(em_ip->checksum); + + em_ip->src_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + em_ip->checksum = + clib_host_to_net_u16(new_l3_c); + *em_port = + clib_host_to_net_u16(db->in2out_key.k.port); + + /* + * fix outter layer ip & icmp checksum + */ + icmp = icmp_info->icmp; + CNAT_UPDATE_ICMP_ERR_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (old_port), + (old_ip_checksum), + (clib_net_to_host_u16(icmp->checksum)), + ((u16)(db->in2out_key.k.ipv4 & 0xffff)), + ((u16)(db->in2out_key.k.ipv4 >> 16)), + ((u16)(db->in2out_key.k.port)), + ((u16)(new_l3_c))) + + icmp->checksum = + clib_host_to_net_u16(new_icmp_c); + + old_ip = clib_net_to_host_u32(ip->dest_addr); + + ip->dest_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + + CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (clib_net_to_host_u16(ip->checksum)), + ((u16)(db->in2out_key.k.ipv4)), + ((u16)(db->in2out_key.k.ipv4 >> 16))) + ip->checksum = + clib_host_to_net_u16(new_l3_c); + +#if 0 + if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) { + /* + * fix inner layer ip & l4 checksum + */ + em_snat_ip = icmp_info->em_ip; + em_snat_port = icmp_info->em_port; + + old_ip = spp_net_to_host_byte_order_32(&(em_snat_ip->dest_addr)); + old_port = spp_net_to_host_byte_order_16(em_snat_port); + old_ip_checksum = spp_net_to_host_byte_order_16(&(em_snat_ip->checksum)); + direction = 1; + if(cnat_static_dest_db_get_translation(em_snat_ip->dest_addr, &postmap_ip, vrf, direction) == CNAT_SUCCESS) { + old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip); + + CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip)), + ((u16)(old_ip >> 16)), + (spp_net_to_host_byte_order_16(&(em_snat_ip->checksum))), + ((u16)(old_postmap_ip)), + ((u16)(old_postmap_ip >> 16))) + em_snat_ip->dest_addr = postmap_ip; + em_snat_ip->checksum = + spp_host_to_net_byte_order_16(new_l3_c); + + /* + * fix outter layer ip & icmp checksum + */ + icmp = icmp_info->icmp; + CNAT_UPDATE_ICMP_ERR_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (old_port), + (old_ip_checksum), + (spp_net_to_host_byte_order_16(&(icmp->checksum))), + ((u16)(old_postmap_ip & 0xffff)), + ((u16)(old_postmap_ip >> 16)), + ((u16)(old_port)), + ((u16)(new_l3_c))) + + icmp->checksum = + spp_host_to_net_byte_order_16(new_icmp_c); + + } + } + + if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) { + direction = 1; + if(cnat_static_dest_db_get_translation(ip->src_addr, &postmap_ip, vrf, direction) == CNAT_SUCCESS) { + CNAT_UPDATE_L3_CHECKSUM_DECLARE + + old_ip = spp_net_to_host_byte_order_32(&(ip->src_addr)); + old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip); + + CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (spp_net_to_host_byte_order_16(&(ip->checksum))), + ((u16)(old_postmap_ip & 0xFFFF)), + ((u16)(old_postmap_ip >> 16))) + ip->checksum = + spp_host_to_net_byte_order_16(new_l3_c); + ip->src_addr = postmap_ip; + } + } +#endif /* if 0 */ +} + +/* + * Use the generic buffer metadata + first line of packet data prefetch + * stage function from <api/pipeline.h>. This is usually a Good Idea. + */ +#define stage0 generic_stage0 + + +static inline void +stage1(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + u64 a, b, c; + u32 bucket; + u8 *prefetch_target; + + vlib_buffer_t * b0 = vlib_get_buffer (vm, buffer_index); + ipv4_header *ip = vlib_buffer_get_current (b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len); + ipv4_header *em_ip = (ipv4_header*)((u8*)icmp + 8); /* embedded pkt's v4 hdr */ + u8 em_ip_hdr_len = (em_ip->version_hdr_len_words & 0xf) << 2; + + u64 tmp = 0; + u32 protocol = CNAT_ICMP; + + /* Check L4 header for embedded packet */ + if (em_ip->protocol == TCP_PROT) { + tcp_hdr_type *tcp = (tcp_hdr_type*)((u8 *)em_ip + em_ip_hdr_len); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16(tcp->src_port); + protocol = CNAT_TCP; + + } else if (em_ip->protocol == UDP_PROT) { + udp_hdr_type_t *udp = (udp_hdr_type_t *)((u8 *)em_ip + em_ip_hdr_len); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16(udp->src_port); + protocol = CNAT_UDP; + + } else { + icmp_v4_t *icmp = (icmp_v4_t*)((u8 *)em_ip + em_ip_hdr_len); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16(icmp->identifier); + + if (PREDICT_FALSE((icmp->type != ICMPV4_ECHOREPLY) && + (icmp->type != ICMPV4_ECHO))) { + /* + * Try to set invalid protocol for these cases, so that + * hash lookup does not return valid main_db. This approach + * may optimize the regular cases with valid protocols + * as it avoids one more check for regular cases in stage3 + */ + protocol = CNAT_INVALID_PROTO; + } + } + + tmp = vnet_buffer(b0)->vcgn_uii.key.k.ipv4 = + clib_net_to_host_u32(em_ip->src_addr); + + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.port) << 32; + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + vnet_buffer(b0)->vcgn_uii.key.k.vrf, + protocol) + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.vrf) << 48; + + CNAT_V4_GET_HASH(tmp, bucket, CNAT_MAIN_HASH_MASK) + + prefetch_target = (u8 *)(&cnat_out2in_hash[bucket]); + vnet_buffer(b0)->vcgn_uii.bucket = bucket; + + /* Prefetch the hash bucket */ + CLIB_PREFETCH(prefetch_target, CLIB_CACHE_LINE_BYTES, LOAD); +} + + +#define SPP_LOG2_CACHE_LINE_BYTES 6 +#define SPP_CACHE_LINE_BYTES (1 << SPP_LOG2_CACHE_LINE_BYTES) + +static inline void +stage2(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + uword prefetch_target0, prefetch_target1; + u32 bucket = vnet_buffer(b0)->vcgn_uii.bucket; + + /* read the hash bucket */ + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket + = cnat_out2in_hash[bucket].next; + + if (PREDICT_TRUE(db_index != EMPTY)) { + /* + * Prefetch database keys. We save space by not cache-line + * aligning the DB entries. We don't want to waste LSU + * bandwidth prefetching stuff we won't need. + */ + prefetch_target0 = (uword)(cnat_main_db + db_index); + CLIB_PREFETCH((void*)prefetch_target0, CLIB_CACHE_LINE_BYTES, LOAD); + /* Just beyond DB key #2 */ + prefetch_target1 = prefetch_target0 + + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports); + /* If the targets are in different lines, do the second prefetch */ + if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) != + (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) { + CLIB_PREFETCH((void *)prefetch_target1, CLIB_CACHE_LINE_BYTES, LOAD); + } + } +} + + +static inline void +stage3(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + cnat_main_db_entry_t *db; + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + + /* + * Note: if the search already failed (empty bucket), + * the answer is already in the pipeline context structure + */ + if (PREDICT_TRUE(db_index != EMPTY)) { + + /* + * Note: hash collisions suck. We can't easily prefetch around them. + * The first trip around the track will be fast. After that, maybe + * not so much... + */ + do { + db = cnat_main_db + db_index; + if (PREDICT_TRUE(db->out2in_key.key64 == + vnet_buffer(b0)->vcgn_uii.key.key64)) { + break; + } + db_index = db->out2in_hash.next; + } while (db_index != EMPTY); + + /* Stick the answer back into the pipeline context structure */ + vnet_buffer(b0)->vcgn_uii.bucket = db_index; + } +} + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + int disposition = CNAT_V4_ICMP_E_O2I_T; + int counter = CNAT_V4_ICMP_E_O2I_T_PKT; + + ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len); + ipv4_header *em_ip = (ipv4_header*)((u8*)icmp + 8); /* embedded pkt's v4 hdr */ + u8 em_ip_hdr_len = (em_ip->version_hdr_len_words & 0xf) << 2; + vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_icmp_e_outside_input_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + cnat_main_db_entry_t *db = NULL; + icmp_em_ip_info icmp_info; + + + if (PREDICT_TRUE(db_index != EMPTY)) { + + icmp_info.em_ip = em_ip; + icmp_info.icmp = icmp; + + /* Note: This could have been done in stage1 itself, + * but we need to introduce one u16 * in vnet_buffer_opaque_t + * Since this flow is expected to be very rare in actual + * deployment scenario, we may afford to do these steps here + * as well. Lets confirm during core review. */ + + if (em_ip->protocol == TCP_PROT) { + tcp_hdr_type *tcp = (tcp_hdr_type*)((u8 *)em_ip + em_ip_hdr_len); + icmp_info.em_port = &(tcp->src_port); + } else if (em_ip->protocol == UDP_PROT) { + udp_hdr_type_t *udp = (udp_hdr_type_t *) + ((u8 *)em_ip + em_ip_hdr_len); + icmp_info.em_port = &(udp->src_port); + } else { + icmp_v4_t *icmp_inner = (icmp_v4_t*)((u8 *)em_ip + em_ip_hdr_len); + icmp_info.em_port = &(icmp_inner->identifier); + } + + db = cnat_main_db + db_index; + + if (PREDICT_FALSE(icmp_debug_flag)) { + printf("\nDUMPING ICMP PKT BEFORE\n"); + print_icmp_pkt(ip); + } + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + + swap_ip_dst_emip_src(ip, &icmp_info, + db, db->in2out_key.k.vrf); + + if (PREDICT_FALSE(icmp_debug_flag)) { + printf("\nDUMPING ICMP PKT AFTER\n"); + print_icmp_pkt(ip); + } + + } else { + disposition = CNAT_V4_ICMP_E_O2I_D; + counter = CNAT_V4_ICMP_E_O2I_D_PKT; + } + + em->counters[node_counter_base_index + counter] += 1; + return disposition; +} + +#include <vnet/pipeline.h> + +static uword cnat_ipv4_icmp_e_outside_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + + +VLIB_REGISTER_NODE (cnat_ipv4_icmp_e_outside_input_node) = { + .function = cnat_ipv4_icmp_e_outside_input_node_fn, + .name = "vcgn-v4-icmp-e-o2i", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_ipv4_icmp_e_outside_input_error_strings), + .error_strings = cnat_ipv4_icmp_e_outside_input_error_strings, + + .n_next_nodes = CNAT_V4_ICMP_E_O2I_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [CNAT_V4_ICMP_E_O2I_T] = "ip4-input", + [CNAT_V4_ICMP_E_O2I_D] = "error-drop", + }, +}; + +clib_error_t *cnat_ipv4_icmp_e_outside_input_init (vlib_main_t *vm) +{ + cnat_ipv4_icmp_e_outside_input_main_t * mp = &cnat_ipv4_icmp_e_outside_input_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_ipv4_icmp_e_outside_input_init); diff --git a/vnet/vnet/vcgn/cnat_ipv4_icmp_query_inside_input.c b/vnet/vnet/vcgn/cnat_ipv4_icmp_query_inside_input.c new file mode 100644 index 00000000000..1b9f0266d71 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_icmp_query_inside_input.c @@ -0,0 +1,404 @@ +/* + *--------------------------------------------------------------------------- + * cnat_ipv4_icmp_query_inside_input.c - cnat_ipv4_icmp_query_inside_input node pipeline stage functions + * + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_ipv4_icmp.h" + +#define foreach_cnat_ipv4_icmp_q_inside_input_error \ +_(CNAT_V4_ICMP_Q_I2O_T_PKT, "cnat v4 icmp_q i2o packet transmit") \ +_(CNAT_V4_ICMP_Q_I2O_MISS_PKT, "cnat v4 icmp_q i2o db miss") \ +_(CNAT_V4_ICMP_Q_I2O_TTL_GEN, "cnat v4 icmp_q i2o ttl generate") \ +_(CNAT_V4_ICMP_Q_I2O_TTL_DROP, "cnat v4 icmp_q i2o ttl drop") \ +_(CNAT_V4_ICMP_Q_I2O_NO_SESSION_DROP, "cnat v4 icmp_q i2o no session drop") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_icmp_q_inside_input_error +#undef _ + CNAT_IPV4_ICMP_Q_INSIDE_INPUT_N_ERROR, +} cnat_ipv4_icmp_q_inside_input_t; + +static char * cnat_ipv4_icmp_q_inside_input_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_icmp_q_inside_input_error +#undef _ +}; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ipv4_icmp_q_inside_input_main_t; + +typedef enum { + CNAT_V4_ICMP_Q_I2O_T, + CNAT_V4_ICMP_Q_I2O_E, + CNAT_V4_ICMP_Q_I2O_D, + CNAT_V4_ICMP_Q_I2O_NEXT, +} cnat_ipv4_icmp_q_inside_input_next_t; + +cnat_ipv4_icmp_q_inside_input_main_t cnat_ipv4_icmp_q_inside_input_main; +vlib_node_registration_t cnat_ipv4_icmp_q_inside_input_node; + +#define NSTAGES 5 + +inline void swap_ip_src_icmp_id(ipv4_header *ip, + icmp_v4_t *icmp, + cnat_main_db_entry_t *db, u16 vrf) +{ +#if 0 + u32 postmap_ip; + u8 direction; + u32 old_ip; + u32 old_postmap_ip; + + + if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) { + direction = 0; + if(cnat_static_dest_db_get_translation(ip->dest_addr, &postmap_ip, vrf, direction) == CNAT_SUCCESS) { + CNAT_UPDATE_L3_CHECKSUM_DECLARE + + old_ip = spp_net_to_host_byte_order_32(&(ip->dest_addr)); + old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip); + + CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (spp_net_to_host_byte_order_16(&(ip->checksum))), + ((u16)(old_postmap_ip & 0xFFFF)), + ((u16)(old_postmap_ip >> 16))) + ip->dest_addr = postmap_ip; + + ip->checksum = + spp_host_to_net_byte_order_16(new_l3_c); + } + } +#endif /* if 0 */ + /* + * declare variable + */ + CNAT_UPDATE_L3_L4_CHECKSUM_DECLARE + /* + * calculate checksum + */ + CNAT_UPDATE_L3_ICMP_CHECKSUM(((u16)(db->in2out_key.k.ipv4)), + ((u16)(db->in2out_key.k.ipv4 >> 16)), + (db->in2out_key.k.port), + (clib_net_to_host_u16(ip->checksum)), + (clib_net_to_host_u16(icmp->checksum)), + ((u16)(db->out2in_key.k.ipv4)), + ((u16)(db->out2in_key.k.ipv4 >> 16)), + (db->out2in_key.k.port)) + //set ip header + ip->src_addr = + clib_host_to_net_u32(db->out2in_key.k.ipv4); + ip->checksum = + clib_host_to_net_u16(new_l3_c); + + //set icmp header + icmp->identifier = + clib_host_to_net_u16(db->out2in_key.k.port); + icmp->checksum = + clib_host_to_net_u16(new_l4_c); +} + +/* + * Use the generic buffer metadata + first line of packet data prefetch + * stage function from <api/pipeline.h>. This is usually a Good Idea. + */ +#define stage0 generic_stage0 + +static inline void +stage1(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + u64 a, b, c; + u32 bucket; + u8 *prefetch_target; + + vlib_buffer_t * b0 = vlib_get_buffer (vm, buffer_index); + ipv4_header *ip = vlib_buffer_get_current (b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len); + + u64 tmp = 0; + tmp = vnet_buffer(b0)->vcgn_uii.key.k.ipv4 = + clib_net_to_host_u32(ip->src_addr); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16 (icmp->identifier); + + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.port) << 32; + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + vnet_buffer(b0)->vcgn_uii.key.k.vrf, + CNAT_ICMP) + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.vrf) << 48; + + CNAT_V4_GET_HASH(tmp, bucket, CNAT_MAIN_HASH_MASK) + + prefetch_target = (u8 *)(&cnat_in2out_hash[bucket]); + vnet_buffer(b0)->vcgn_uii.bucket = bucket; + + /* Prefetch the hash bucket */ + CLIB_PREFETCH(prefetch_target, CLIB_CACHE_LINE_BYTES, LOAD); +} + +#define SPP_LOG2_CACHE_LINE_BYTES 6 +#define SPP_CACHE_LINE_BYTES (1 << SPP_LOG2_CACHE_LINE_BYTES) + +static inline void +stage2(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + uword prefetch_target0, prefetch_target1; + u32 bucket = vnet_buffer(b0)->vcgn_uii.bucket; + + /* read the hash bucket */ + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket + = cnat_in2out_hash[bucket].next; + + if (PREDICT_TRUE(db_index != EMPTY)) { + /* + * Prefetch database keys. We save space by not cache-line + * aligning the DB entries. We don't want to waste LSU + * bandwidth prefetching stuff we won't need. + */ + prefetch_target0 = (uword)(cnat_main_db + db_index); + CLIB_PREFETCH((void*)prefetch_target0, CLIB_CACHE_LINE_BYTES, LOAD); + /* Just beyond DB key #2 */ + prefetch_target1 = prefetch_target0 + + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports); + /* If the targets are in different lines, do the second prefetch */ + if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) != + (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) { + CLIB_PREFETCH((void *)prefetch_target1, CLIB_CACHE_LINE_BYTES, LOAD); + } + } +} + +static inline void +stage3(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + cnat_main_db_entry_t *db; + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + + /* + * Note: if the search already failed (empty bucket), + * the answer is already in the pipeline context structure + */ + if (PREDICT_TRUE(db_index != EMPTY)) { + + /* + * Note: hash collisions suck. We can't easily prefetch around them. + * The first trip around the track will be fast. After that, maybe + * not so much... + */ + do { + db = cnat_main_db + db_index; + if (PREDICT_TRUE(db->in2out_key.key64 == + vnet_buffer(b0)->vcgn_uii.key.key64)) { + break; + } + db_index = db->in2out_hash.next; + } while (db_index != EMPTY); + + /* Stick the answer back into the pipeline context structure */ + vnet_buffer(b0)->vcgn_uii.bucket = db_index; + } +} + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + spp_ctx_t *ctx = (spp_ctx_t *) &vnet_buffer(b0)->vcgn_uii; + int disposition = CNAT_V4_ICMP_Q_I2O_T; + int counter = CNAT_V4_ICMP_Q_I2O_T_PKT; + + ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len); + vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_icmp_q_inside_input_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + cnat_session_entry_t *session_db = NULL; + cnat_main_db_entry_t *db = NULL; + cnat_key_t dest_info; + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + if (PREDICT_FALSE(ip->ttl <= 1)) { + /* Try to generate ICMP error msg, as TTL is <= 1 */ + + if (icmpv4_generate_with_throttling + (ctx, ip, ctx->ru.rx.uidb_index)) { + + /* Generated ICMP */ + disposition = CNAT_V4_ICMP_Q_I2O_T; + counter = CNAT_V4_ICMP_Q_I2O_TTL_GEN; + } else { + /* Could not generated ICMP - drop the packet */ + disposition = CNAT_V4_ICMP_Q_I2O_D; + counter = CNAT_V4_ICMP_Q_I2O_TTL_DROP; + } + goto drop_pkt; + } + } + + if (PREDICT_TRUE(db_index != EMPTY)) { + db = cnat_main_db + db_index; + dest_info.k.port = 0; + dest_info.k.ipv4 = clib_net_to_host_u32(ip->dest_addr); + + if(PREDICT_TRUE(!PLATFORM_DBL_SUPPORT)) { + + /* No DBL support, so just update the destn and proceed */ + db->dst_ipv4 = dest_info.k.ipv4; + db->dst_port = dest_info.k.port; + goto update_pkt; + } + + if(PREDICT_FALSE(db->dst_ipv4 != dest_info.k.ipv4)) { + if(PREDICT_TRUE(db->nsessions == 1)) { + /* Handle one to 2 dest scenarion */ + dest_info.k.vrf = db->in2out_key.k.vrf; + session_db = cnat_handle_1to2_session(db, &dest_info); + + if(PREDICT_FALSE(session_db == NULL)) { + disposition = CNAT_V4_ICMP_Q_I2O_D; + counter = CNAT_V4_ICMP_Q_I2O_NO_SESSION_DROP; + goto drop_pkt; + } + } else if (PREDICT_FALSE(db->nsessions == 0)) { + /* Should be a static entry + * Note this session as the first session and log + */ + cnat_add_dest_n_log(db, &dest_info); + } else { /* Many translations exist already */ + dest_info.k.vrf = db->in2out_key.k.vrf; + /* If session already exists, + * cnat_create_session_db_entry will return the existing db + * else create a new db + * If could not create, return NULL + */ + session_db = cnat_create_session_db_entry(&dest_info, + db, TRUE); + + if(PREDICT_FALSE(session_db == NULL)) { + disposition = CNAT_V4_ICMP_Q_I2O_D; + counter = CNAT_V4_ICMP_Q_I2O_NO_SESSION_DROP; + goto drop_pkt; + } + } + } + +update_pkt: + + if (PREDICT_FALSE(icmp_debug_flag)) { + printf("\nDUMPING ICMP PKT BEFORE\n"); + print_icmp_pkt(ip); + } + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + + /* + * 1. update src ipv4 addr and src icmp identifier + * 2. update ipv4 checksum and icmp checksum + */ + swap_ip_src_icmp_id(ip, icmp, db, db->in2out_key.k.vrf); + + if (PREDICT_FALSE(icmp_debug_flag)) { + printf("\nDUMPING ICMP PKT AFTER\n"); + print_icmp_pkt(ip); + } + + /* + * update db counter, timer + */ + + if(PREDICT_FALSE(session_db != 0)) { + CNAT_DB_TIMEOUT_RST(session_db); + } else { + CNAT_DB_TIMEOUT_RST(db); + } + db->in2out_pkts++; + in2out_forwarding_count++; + + } else { + disposition = CNAT_V4_ICMP_Q_I2O_E; + counter = CNAT_V4_ICMP_Q_I2O_MISS_PKT; + } + +drop_pkt: + + em->counters[node_counter_base_index + counter] += 1; + return disposition; +} + +#include <vnet/pipeline.h> + +static uword cnat_ipv4_icmp_q_inside_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + + +VLIB_REGISTER_NODE (cnat_ipv4_icmp_q_inside_input_node) = { + .function = cnat_ipv4_icmp_q_inside_input_node_fn, + .name = "vcgn-v4-icmp-q-i2o", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_ipv4_icmp_q_inside_input_error_strings), + .error_strings = cnat_ipv4_icmp_q_inside_input_error_strings, + + .n_next_nodes = CNAT_V4_ICMP_Q_I2O_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [CNAT_V4_ICMP_Q_I2O_E] = "vcgn-v4-icmp-q-i2o-e", + [CNAT_V4_ICMP_Q_I2O_T] = "ip4-input", + [CNAT_V4_ICMP_Q_I2O_D] = "error-drop", + }, +}; + +clib_error_t *cnat_ipv4_icmp_q_inside_input_init (vlib_main_t *vm) +{ + cnat_ipv4_icmp_q_inside_input_main_t * mp = &cnat_ipv4_icmp_q_inside_input_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_ipv4_icmp_q_inside_input_init); diff --git a/vnet/vnet/vcgn/cnat_ipv4_icmp_query_inside_input_exception.c b/vnet/vnet/vcgn/cnat_ipv4_icmp_query_inside_input_exception.c new file mode 100644 index 00000000000..9b5e280e571 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_icmp_query_inside_input_exception.c @@ -0,0 +1,235 @@ +/* + *--------------------------------------------------------------------------- + * cnat_ipv4_icmp_query_inside_input_exception.c - cnat_ipv4_icmp_query_inside_input_exception node pipeline stage functions + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_ipv4_icmp.h" + +#define foreach_cnat_ipv4_icmp_q_inside_input_exc_error \ +_(CNAT_V4_ICMP_Q_I2O_E_T_PKT, "v4 icmp query i2o-e transmit") \ +_(CNAT_V4_ICMP_Q_I2O_E_G_PKT, "v4 icmp query i2o-e gen icmp msg") \ +_(CNAT_V4_ICMP_Q_I2O_E_D_PKT, "v4 icmp query i2o-e pkt drop") \ +_(CNAT_V4_ICMP_Q_I2O_E_DC_PKT, "v4 icmp query i2o-e drop (no config)") \ +_(CNAT_V4_ICMP_Q_I2O_E_DR_PKT, "v4 icmp query i2o-e drop (not in run state)") \ +_(CNAT_V4_ICMP_Q_I2O_E_DD_PKT, "v4 icmp query i2o-e drop (no direct port)") \ +_(CNAT_V4_ICMP_Q_I2O_E_DA_PKT, "v4 icmp query i2o-e drop (no any port)") \ +_(CNAT_V4_ICMP_Q_I2O_E_DO_PKT, "v4 icmp query i2o-e drop (out of port limit)") \ +_(CNAT_V4_ICMP_Q_I2O_E_DS_PKT, "v4 icmp query i2o_e drop (out of session db)") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_icmp_q_inside_input_exc_error +#undef _ + CNAT_IPV4_ICMP_Q_INSIDE_INPUT_EXCEPTIONS_N_ERROR, +} cnat_ipv4_icmp_q_inside_input_exc_error_t; + + +static char * cnat_ipv4_icmp_q_inside_input_exc_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_icmp_q_inside_input_exc_error +#undef _ +}; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ipv4_icmp_q_inside_input_exc_main_t; + +typedef enum { + CNAT_V4_ICMP_Q_E_I2O_T, + //CNAT_V4_ICMP_Q_E_I2O_GEN, + CNAT_V4_ICMP_Q_E_I2O_D, + CNAT_V4_ICMP_Q_E_I2O_NEXT, +} cnat_ipv4_icmp_q_inside_input_exc_next_t; + +#define CNAT_V4_ICMP_Q_E_I2O_GEN CNAT_V4_ICMP_Q_E_I2O_T + +cnat_ipv4_icmp_q_inside_input_exc_main_t cnat_ipv4_icmp_q_inside_input_exc_main; +vlib_node_registration_t cnat_ipv4_icmp_q_inside_input_exc_node; + +#define NSTAGES 2 + +/* + * Use the generic buffer metadata + first line of packet data prefetch + * stage function from <api/pipeline.h>. This is usually a Good Idea. + */ +#define stage0 generic_stage0 + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi); + int disposition = CNAT_V4_ICMP_Q_E_I2O_T; + int counter = CNAT_V4_ICMP_Q_I2O_E_T_PKT; + + ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len); + vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_icmp_q_inside_input_exc_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + cnat_key_t dest_info; + cnat_gen_icmp_info info; + cnat_db_key_bucket_t ki; + cnat_main_db_entry_t *db = NULL; + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + ki.k.k.vrf, CNAT_ICMP) + + ki.k.k.ipv4 = + clib_net_to_host_u32(ip->src_addr); + ki.k.k.port = + clib_net_to_host_u16(icmp->identifier); + + dest_info.k.port = 0; + dest_info.k.ipv4 = clib_net_to_host_u32(ip->dest_addr); + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + dest_info.k.vrf, CNAT_ICMP) + + db = cnat_get_main_db_entry_v2(&ki, PORT_SINGLE, PORT_TYPE_DYNAMIC, + &info, &dest_info); + if (PREDICT_TRUE(db != 0)) { + + if (PREDICT_FALSE(icmp_debug_flag)) { + printf("\nDUMPING ICMP PKT BEFORE\n"); + print_icmp_pkt(ip); + } + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + + /* + * step 6 do nat before fwd pkt + */ + swap_ip_src_icmp_id(ip, icmp, db, db->in2out_key.k.vrf); + + if (PREDICT_FALSE(icmp_debug_flag)) { + printf("\nDUMPING ICMP PKT AFTER\n"); + print_icmp_pkt(ip); + } + + /* + * update db for this pkt + */ + CNAT_DB_UPDATE_IN2OUT_TIMER + in2out_forwarding_count++; + + } else { + switch (info.error) { + case (CNAT_NO_VRF_RUN): + counter = CNAT_V4_ICMP_Q_I2O_E_DR_PKT; + break; + case (CNAT_OUT_LIMIT): + counter = CNAT_V4_ICMP_Q_I2O_E_DO_PKT; + break; + case (CNAT_NO_PORT_ANY): + case (CNAT_NO_POOL_ANY): + case (CNAT_BAD_INUSE_ANY): + case (CNAT_NOT_FOUND_ANY): + counter = CNAT_V4_ICMP_Q_I2O_E_DA_PKT; + break; + case (CNAT_INV_PORT_DIRECT): + case (CNAT_DEL_PORT_DIRECT): + case (CNAT_BAD_INUSE_DIRECT): + case (CNAT_NOT_FOUND_DIRECT): + counter = CNAT_V4_ICMP_Q_I2O_E_DD_PKT; + break; + case (CNAT_ERR_NO_SESSION_DB): + counter = CNAT_V4_ICMP_Q_I2O_E_DS_PKT; + break; + default: + counter = CNAT_V4_ICMP_Q_I2O_E_DC_PKT; + break; + } + /* + * send to icmp msg generate node + */ + if (info.gen_icmp_msg == CNAT_ICMP_MSG) { + #if 0 + u32 *fd = (u32*)ctx->feature_data; + fd[0] = info.svi_addr; + fd[1] = CNAT_ICMP_DEST_UNREACHABLE; + #endif + disposition = CNAT_V4_ICMP_Q_E_I2O_GEN; + counter = CNAT_V4_ICMP_Q_I2O_E_G_PKT; + } else { + disposition = CNAT_V4_ICMP_Q_E_I2O_D; + counter = CNAT_V4_ICMP_Q_I2O_E_D_PKT; + } + DEBUG_I2O_DROP(CNAT_DEBUG_DROP_ICMP) + } + + em->counters[node_counter_base_index + counter] += 1; + + return disposition; +} + +#include <vnet/pipeline.h> + +static uword cnat_ipv4_icmp_q_inside_input_exc_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + +VLIB_REGISTER_NODE (cnat_ipv4_icmp_q_inside_input_exc_node) = { + .function = cnat_ipv4_icmp_q_inside_input_exc_node_fn, + .name = "vcgn-v4-icmp-q-i2o-e", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_ipv4_icmp_q_inside_input_exc_error_strings), + .error_strings = cnat_ipv4_icmp_q_inside_input_exc_error_strings, + + .n_next_nodes = CNAT_V4_ICMP_Q_E_I2O_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + //[CNAT_V4_ICMP_Q_E_I2O_GEN] = "icmp_msg_gen", /* Currently it will go + //to ip4-input node. We have to port icmp msg generator node */ + [CNAT_V4_ICMP_Q_E_I2O_T] = "ip4-input", + [CNAT_V4_ICMP_Q_E_I2O_D] = "error-drop", + }, +}; + + +clib_error_t *cnat_ipv4_icmp_q_inside_input_exc_init (vlib_main_t *vm) +{ + cnat_ipv4_icmp_q_inside_input_exc_main_t * mp = &cnat_ipv4_icmp_q_inside_input_exc_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_ipv4_icmp_q_inside_input_exc_init); diff --git a/vnet/vnet/vcgn/cnat_ipv4_icmp_query_outside_input.c b/vnet/vnet/vcgn/cnat_ipv4_icmp_query_outside_input.c new file mode 100644 index 00000000000..2c05e0b400e --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_icmp_query_outside_input.c @@ -0,0 +1,381 @@ +/* + *--------------------------------------------------------------------------- + * cnat_ipv4_icmp_query_outside_input.c - cnat_ipv4_icmp_query_outside_input node pipeline stage functions + * + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_ipv4_icmp.h" + +#define foreach_cnat_ipv4_icmp_q_outside_input_error \ +_(CNAT_V4_ICMP_Q_O2I_T_PKT, "cnat v4 icmp_q o2i packet transmit") \ +_(CNAT_V4_ICMP_Q_O2I_MISS_PKT, "cnat v4 icmp_q o2i drop") \ +_(CNAT_V4_ICMP_Q_O2I_TTL_GEN, "cnat v4 icmp_q o2i ttl generate") \ +_(CNAT_V4_ICMP_Q_O2I_TTL_DROP, "cnat v4 icmp_q o2i ttl drop") \ +_(CNAT_V4_ICMP_Q_O2I_NO_SESSION_DROP, "cnat v4 icmp_q o2i no session drop") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_icmp_q_outside_input_error +#undef _ + CNAT_IPV4_ICMP_Q_OUTSIDE_INPUT_N_ERROR, +} cnat_ipv4_icmp_q_outside_input_t; + +static char * cnat_ipv4_icmp_q_outside_input_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_icmp_q_outside_input_error +#undef _ +}; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ipv4_icmp_q_outside_input_main_t; + +typedef enum { + CNAT_V4_ICMP_Q_O2I_T, + CNAT_V4_ICMP_Q_O2I_D, + CNAT_V4_ICMP_Q_O2I_NEXT, +} cnat_ipv4_icmp_q_outside_input_next_t; + +cnat_ipv4_icmp_q_outside_input_main_t cnat_ipv4_icmp_q_outside_input_main; +vlib_node_registration_t cnat_ipv4_icmp_q_outside_input_node; + +#define NSTAGES 5 + +inline void swap_ip_dst_icmp_id(ipv4_header *ip, + icmp_v4_t *icmp, + cnat_main_db_entry_t *db, u16 vrf) +{ +#if 0 + u32 postmap_ip; + u8 direction; + u32 old_ip; + u32 old_postmap_ip; + + if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) { + direction = 1; + if(cnat_static_dest_db_get_translation(ip->src_addr, &postmap_ip, vrf, direction) == CNAT_SUCCESS) { + CNAT_UPDATE_L3_CHECKSUM_DECLARE + + old_ip = spp_net_to_host_byte_order_32(&(ip->src_addr)); + old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip); + + CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (spp_net_to_host_byte_order_16(&(ip->checksum))), + ((u16)(old_postmap_ip & 0xFFFF)), + ((u16)(old_postmap_ip >> 16))) + ip->checksum = + spp_host_to_net_byte_order_16(new_l3_c); + ip->src_addr = postmap_ip; + } + } +#endif /* if 0 */ + /* + * declare variable + */ + CNAT_UPDATE_L3_L4_CHECKSUM_DECLARE + /* + * calculate checksum + */ + CNAT_UPDATE_L3_ICMP_CHECKSUM(((u16)(db->out2in_key.k.ipv4)), + ((u16)(db->out2in_key.k.ipv4 >> 16)), + (db->out2in_key.k.port), + (clib_net_to_host_u16(ip->checksum)), + (clib_net_to_host_u16(icmp->checksum)), + ((u16)(db->in2out_key.k.ipv4)), + ((u16)(db->in2out_key.k.ipv4 >> 16)), + (db->in2out_key.k.port)) + //set ip header + ip->dest_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + ip->checksum = + clib_host_to_net_u16(new_l3_c); + + //set icmp header + icmp->identifier = + clib_host_to_net_u16(db->in2out_key.k.port); + icmp->checksum = + clib_host_to_net_u16(new_l4_c); +} + +/* + * Use the generic buffer metadata + first line of packet data prefetch + * stage function from <api/pipeline.h>. This is usually a Good Idea. + */ +#define stage0 generic_stage0 + +static inline void +stage1(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + u64 a, b, c; + u32 bucket; + u8 *prefetch_target; + + vlib_buffer_t * b0 = vlib_get_buffer (vm, buffer_index); + ipv4_header *ip = vlib_buffer_get_current (b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len); + + u64 tmp = 0; + tmp = vnet_buffer(b0)->vcgn_uii.key.k.ipv4 = + clib_net_to_host_u32(ip->dest_addr); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16 (icmp->identifier); + + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.port) << 32; + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + vnet_buffer(b0)->vcgn_uii.key.k.vrf, + CNAT_ICMP) + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.vrf) << 48; + + CNAT_V4_GET_HASH(tmp, bucket, CNAT_MAIN_HASH_MASK) + + prefetch_target = (u8 *)(&cnat_out2in_hash[bucket]); + vnet_buffer(b0)->vcgn_uii.bucket = bucket; + + /* Prefetch the hash bucket */ + CLIB_PREFETCH(prefetch_target, CLIB_CACHE_LINE_BYTES, LOAD); +} + +#define SPP_LOG2_CACHE_LINE_BYTES 6 +#define SPP_CACHE_LINE_BYTES (1 << SPP_LOG2_CACHE_LINE_BYTES) + +static inline void +stage2(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + uword prefetch_target0, prefetch_target1; + u32 bucket = vnet_buffer(b0)->vcgn_uii.bucket; + + /* read the hash bucket */ + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket + = cnat_out2in_hash[bucket].next; + + if (PREDICT_TRUE(db_index != EMPTY)) { + /* + * Prefetch database keys. We save space by not cache-line + * aligning the DB entries. We don't want to waste LSU + * bandwidth prefetching stuff we won't need. + */ + prefetch_target0 = (uword)(cnat_main_db + db_index); + CLIB_PREFETCH((void*)prefetch_target0, CLIB_CACHE_LINE_BYTES, LOAD); + /* Just beyond DB key #2 */ + prefetch_target1 = prefetch_target0 + + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports); + /* If the targets are in different lines, do the second prefetch */ + if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) != + (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) { + CLIB_PREFETCH((void *)prefetch_target1, CLIB_CACHE_LINE_BYTES, LOAD); + } + } +} + +static inline void +stage3(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + cnat_main_db_entry_t *db; + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + + /* + * Note: if the search already failed (empty bucket), + * the answer is already in the pipeline context structure + */ + if (PREDICT_TRUE(db_index != EMPTY)) { + + /* + * Note: hash collisions suck. We can't easily prefetch around them. + * The first trip around the track will be fast. After that, maybe + * not so much... + */ + do { + db = cnat_main_db + db_index; + if (PREDICT_TRUE(db->out2in_key.key64 == + vnet_buffer(b0)->vcgn_uii.key.key64)) { + break; + } + db_index = db->out2in_hash.next; + } while (db_index != EMPTY); + + /* Stick the answer back into the pipeline context structure */ + vnet_buffer(b0)->vcgn_uii.bucket = db_index; + } +} + + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + int disposition = CNAT_V4_ICMP_Q_O2I_T; + int counter = CNAT_V4_ICMP_Q_O2I_T_PKT; + + ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len); + vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_icmp_q_outside_input_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + cnat_session_entry_t *session_db = NULL; + cnat_main_db_entry_t *db = NULL; + cnat_key_t dest_info; + cnat_vrfmap_t * vrf_map_p __attribute__((unused)) = NULL; + u32 vrf_index __attribute__((unused)) = 0; + + if (PREDICT_TRUE(db_index != EMPTY)) { + + db = cnat_main_db + db_index; + dest_info.k.port = 0; + dest_info.k.ipv4 = clib_net_to_host_u32(ip->src_addr); + + if (PREDICT_FALSE(icmp_debug_flag)) { + printf("\nDUMPING ICMP PKT BEFORE\n"); + print_icmp_pkt(ip); + } + + vrf_map_p = cnat_map_by_vrf + db->vrfmap_index; + vrf_index = (db->in2out_key.k.vrf & CNAT_VRF_MASK); + + if(PREDICT_TRUE(!PLATFORM_DBL_SUPPORT)) { + + /* No DBL support, so just update the destn and proceed */ + db->dst_ipv4 = dest_info.k.ipv4; + db->dst_port = dest_info.k.port; + goto update_pkt; + } + + if(PREDICT_FALSE(db->dst_ipv4 != dest_info.k.ipv4)) { + + if(PREDICT_TRUE(db->nsessions == 1)) { + /* Handle one to 2 dest scenarion */ + dest_info.k.vrf = db->in2out_key.k.vrf; + session_db = cnat_handle_1to2_session(db, &dest_info); + + if(PREDICT_FALSE(session_db == NULL)) { + disposition = CNAT_V4_ICMP_Q_O2I_D; + counter = CNAT_V4_ICMP_Q_O2I_NO_SESSION_DROP; + goto drop_pkt; + } + } else if (PREDICT_FALSE(db->nsessions == 0)) { + /* Should be a static entry + * Note this session as the first session and log + */ + cnat_add_dest_n_log(db, &dest_info); + } else { /* Many translations exist already */ + dest_info.k.vrf = db->in2out_key.k.vrf; + /* If session already exists, + * cnat_create_session_db_entry will return the existing db + * else create a new db + * If could not create, return NULL + */ + session_db = cnat_create_session_db_entry(&dest_info, + db, TRUE); + + if(PREDICT_FALSE(session_db == NULL)) { + disposition = CNAT_V4_ICMP_Q_O2I_D; + counter = CNAT_V4_ICMP_Q_O2I_NO_SESSION_DROP; + goto drop_pkt; + } + } + } + +update_pkt: + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + + /* + * 1. update dest ipv4 addr and icmp id + * 2. update ipv4 checksum and icmp checksum + */ + swap_ip_dst_icmp_id(ip, icmp, db, db->in2out_key.k.vrf); + + if (PREDICT_FALSE(icmp_debug_flag)) { + printf("\nDUMPING ICMP PKT AFTER\n"); + print_icmp_pkt(ip); + } + + db->out2in_pkts++; + + //nat44_dslite_global_stats[dslite_flag].out2in_forwarding_count++; + + } else { + disposition = CNAT_V4_ICMP_Q_O2I_D; + counter = CNAT_V4_ICMP_Q_O2I_MISS_PKT; + } + +drop_pkt: + em->counters[node_counter_base_index + counter] += 1; + return disposition; + +} + +#include <vnet/pipeline.h> + +static uword cnat_ipv4_icmp_q_outside_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + + +VLIB_REGISTER_NODE (cnat_ipv4_icmp_q_outside_input_node) = { + .function = cnat_ipv4_icmp_q_outside_input_node_fn, + .name = "vcgn-v4-icmp-q-o2i", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_ipv4_icmp_q_outside_input_error_strings), + .error_strings = cnat_ipv4_icmp_q_outside_input_error_strings, + + .n_next_nodes = CNAT_V4_ICMP_Q_O2I_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [CNAT_V4_ICMP_Q_O2I_T] = "ip4-input", + [CNAT_V4_ICMP_Q_O2I_D] = "error-drop", + }, +}; + +clib_error_t *cnat_ipv4_icmp_q_outside_input_init (vlib_main_t *vm) +{ + cnat_ipv4_icmp_q_outside_input_main_t * mp = &cnat_ipv4_icmp_q_outside_input_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_ipv4_icmp_q_outside_input_init); diff --git a/vnet/vnet/vcgn/cnat_ipv4_tcp_inside_input.c b/vnet/vnet/vcgn/cnat_ipv4_tcp_inside_input.c new file mode 100644 index 00000000000..5bea707385c --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_tcp_inside_input.c @@ -0,0 +1,424 @@ +/* + *--------------------------------------------------------------------------- + * cnat_ipv4_tcp_inside_input.c - cnat_ipv4_tcp_inside_input node pipeline + * stage functions + * + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_db.h" +#include "tcp_header_definitions.h" +#include "cnat_config.h" +#include "cnat_global.h" +#include "cnat_v4_functions.h" + +#define foreach_cnat_ipv4_tcp_inside_input_error \ +_(CNAT_V4_TCP_I2O_PKT_IN, "tcp i2o packets received") \ +_(CNAT_V4_TCP_I2O_PKT_T, "tcp i2o packets natted") \ +_(CNAT_V4_TCP_I2O_EXCEPTION, "packets to tcp i2o exception") \ +_(CNAT_V4_TCP_I2O_TTL_GEN, "generated TTL expiry ICMP packets") \ +_(CNAT_V4_TCP_I2O_TTL_GEN_DROP, "could not generate TTL expiry ICMP packets") \ +_(CNAT_V4_TCP_I2O_SESSION_DROP, "could not generate session") \ +_(CNAT_V4_UDP_I2O_FRAG_DROP, "non-first fragment drop") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_tcp_inside_input_error +#undef _ + CNAT_IPV4_TCP_INSIDE_INPUT_N_ERROR, +} cnat_ipv4_tcp_inside_input_t; + +static char * cnat_ipv4_tcp_inside_input_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_tcp_inside_input_error +#undef _ +}; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ipv4_tcp_inside_input_main_t; + +typedef enum { + CNAT_V4_TCP_I2O_E, + CNAT_V4_TCP_I2O_T, + CNAT_V4_TCP_I2O_D, + CNAT_V4_TCP_I2O_NEXT, +} cnat_ipv4_tcp_inside_input_next_t; + +#define CNAT_REWRITE_OUTPUT CNAT_V4_TCP_I2O_T +#define CNAT_V4_ICMP_GEN CNAT_V4_TCP_I2O_D + +//#define CNAT_V4_TCP_I2O_E CNAT_V4_TCP_I2O_D //remove it once exception node is created +cnat_ipv4_tcp_inside_input_main_t cnat_ipv4_tcp_inside_input_main; +vlib_node_registration_t cnat_ipv4_tcp_inside_input_node; + +#define NSTAGES 6 + +/* + * Use the generic buffer metadata + first line of packet data prefetch + * stage function from <api/pipeline.h>. This is usually a Good Idea. + */ +#define stage0 generic_stage0 + + +static inline void +stage1(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + u64 a, b, c; + u32 bucket; + u8 *prefetch_target; + //cnat_feature_data_t *fd = (cnat_feature_data_t *)ctx->feature_data; + + + vlib_buffer_t * b0 = vlib_get_buffer (vm, buffer_index); + ipv4_header *ip = vlib_buffer_get_current (b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + tcp_hdr_type *tcp = (tcp_hdr_type *)((u8*)ip + ipv4_hdr_len); + + u64 tmp = 0; + tmp = vnet_buffer(b0)->vcgn_uii.key.k.ipv4 = + clib_net_to_host_u32(ip->src_addr); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16 (tcp->src_port); + + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.port) << 32; + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + vnet_buffer(b0)->vcgn_uii.key.k.vrf, + CNAT_TCP) + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.vrf) << 48; + + CNAT_V4_GET_HASH(tmp, bucket, CNAT_MAIN_HASH_MASK) + + prefetch_target = (u8 *)(&cnat_in2out_hash[bucket]); + vnet_buffer(b0)->vcgn_uii.bucket = bucket; + + /* Prefetch the hash bucket */ + CLIB_PREFETCH(prefetch_target, CLIB_CACHE_LINE_BYTES, LOAD); +} + +static inline void +stage2(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ /* nothing */ } + + +#define SPP_LOG2_CACHE_LINE_BYTES 6 +#define SPP_CACHE_LINE_BYTES (1 << SPP_LOG2_CACHE_LINE_BYTES) + +static inline void +stage3(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + uword prefetch_target0, prefetch_target1; + u32 bucket = vnet_buffer(b0)->vcgn_uii.bucket; + + /* read the hash bucket */ + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket + = cnat_in2out_hash[bucket].next; + + if (PREDICT_TRUE(db_index != EMPTY)) { + /* + * Prefetch database keys. We save space by not cache-line + * aligning the DB entries. We don't want to waste LSU + * bandwidth prefetching stuff we won't need. + */ + prefetch_target0 = (uword)(cnat_main_db + db_index); + CLIB_PREFETCH((void*)prefetch_target0, CLIB_CACHE_LINE_BYTES, LOAD); + /* Just beyond DB key #2 */ + prefetch_target1 = prefetch_target0 + + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports); + /* If the targets are in different lines, do the second prefetch */ + if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) != + (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) { + CLIB_PREFETCH((void *)prefetch_target1, CLIB_CACHE_LINE_BYTES, LOAD); + } + } +} + +static inline void +stage4(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + cnat_main_db_entry_t *db; + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + + /* + * Note: if the search already failed (empty bucket), + * the answer is already in the pipeline context structure + */ + if (PREDICT_TRUE(db_index != EMPTY)) { + + /* + * Note: hash collisions suck. We can't easily prefetch around them. + * The first trip around the track will be fast. After that, maybe + * not so much... + */ + do { + db = cnat_main_db + db_index; + if (PREDICT_TRUE(db->in2out_key.key64 == + vnet_buffer(b0)->vcgn_uii.key.key64)) { + break; + } + db_index = db->in2out_hash.next; + } while (db_index != EMPTY); + + /* Stick the answer back into the pipeline context structure */ + vnet_buffer(b0)->vcgn_uii.bucket = db_index; + } +} + + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + spp_ctx_t *ctx = (spp_ctx_t *) &vnet_buffer(b0)->vcgn_uii; + int disposition = CNAT_V4_TCP_I2O_T; + int counter = CNAT_V4_TCP_I2O_PKT_T; + + ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + tcp_hdr_type *tcp = (tcp_hdr_type *)((u8*)ip + ipv4_hdr_len); + vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_tcp_inside_input_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + cnat_session_entry_t *session_db = NULL; + cnat_main_db_entry_t *db = NULL; + cnat_key_t dest_info; + u32 window; + u8 scale; + + + INCREMENT_NODE_COUNTER(CNAT_V4_TCP_I2O_PKT_IN); + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + if (PREDICT_FALSE(ip->ttl <= 1)) { + /* Try to generate ICMP error msg, as TTL is <= 1 */ + + if (icmpv4_generate_with_throttling + (ctx, ip, ctx->ru.rx.uidb_index)) { + + /* Generated ICMP */ + disposition = CNAT_REWRITE_OUTPUT; + counter = CNAT_V4_TCP_I2O_TTL_GEN; + } else { + /* Could not generated ICMP - drop the packet */ + disposition = CNAT_V4_TCP_I2O_D; + counter = CNAT_V4_TCP_I2O_TTL_GEN_DROP; + } + goto drop_pkt; + } + } + + if (PREDICT_FALSE(db_index == EMPTY)) { + /* Deleted fragment code from here */ + disposition = CNAT_V4_TCP_I2O_E; + counter = CNAT_V4_TCP_I2O_EXCEPTION; + } else { + db = cnat_main_db + db_index; + + /* Handle destination sessions */ + dest_info.k.port = clib_net_to_host_u16(tcp->dest_port); + dest_info.k.ipv4 = clib_net_to_host_u32(ip->dest_addr); + + if(PREDICT_TRUE(!PLATFORM_DBL_SUPPORT)) { + + /* No DBL support, so just update the destn and proceed */ + db->dst_ipv4 = dest_info.k.ipv4; + db->dst_port = dest_info.k.port; + goto update_pkt; + } + + if(PREDICT_FALSE(db->dst_ipv4 != dest_info.k.ipv4 || + db->dst_port != dest_info.k.port)) { + if(PREDICT_TRUE(db->nsessions == 0)) { + /* Should be a static entry + * Note this session as the first session and log + */ + cnat_add_dest_n_log(db, &dest_info); + } else if(PREDICT_FALSE(db->nsessions == 1)) { + /* Destn is not same as in main db. Multiple session + * scenario + */ + dest_info.k.vrf = db->in2out_key.k.vrf; + session_db = cnat_handle_1to2_session(db, &dest_info); + if(PREDICT_FALSE(session_db == NULL)) { + disposition = CNAT_V4_TCP_I2O_D; + counter = CNAT_V4_TCP_I2O_SESSION_DROP; + goto drop_pkt; + } + } else { /* There are already multiple destinations */ + dest_info.k.vrf = db->in2out_key.k.vrf; + /* If session already exists, + * cnat_create_session_db_entry will return the existing db + * else create a new db + * If could not create, return NULL + */ + session_db = cnat_create_session_db_entry(&dest_info, + db, TRUE); + if(PREDICT_FALSE(session_db == NULL)) { + disposition = CNAT_V4_TCP_I2O_D; + counter = CNAT_V4_TCP_I2O_SESSION_DROP; + goto drop_pkt; + } + } + if(PREDICT_TRUE(session_db != 0)) { + /* Have to repeat the window size check for new destinations */ + window = (u32)clib_net_to_host_u16(tcp->window_size); + window = window << session_db->scale; + if(PREDICT_TRUE(!session_db->window)) { + calculate_window_scale(tcp, &scale); + session_db->scale = scale; + session_db->window = window; + } else if (PREDICT_FALSE(session_db->window < + window)) { + /* Update the db entry with window option from packet */ + session_db->window = window; + } else { + /* Do nothing */ + } + session_db->tcp_seq_num = clib_net_to_host_u32(tcp->seq_num); + session_db->ack_no = clib_net_to_host_u32(tcp->ack_num); +#if DEBUG > 1 + printf("\n In2out SDB stages seq no = %u," + " ack no = %u, window = %u\n", + session_db->tcp_seq_num, + session_db->ack_no, + session_db->window); +#endif + + } + } else { + //Update the seq no and ack no for subsequent communication + //after connection establishment + //No need to update window here. Window is already updated + //during connection establishment + window = (u32)clib_net_to_host_u16(tcp->window_size); + window = window << db->scale; + if(PREDICT_FALSE(!ALG_ENABLED_DB(db))) { + //This check is done since proto_data is part of union in main + //db entry + db->proto_data.tcp_seq_chk.seq_no = + clib_net_to_host_u32(tcp->seq_num); + db->proto_data.tcp_seq_chk.ack_no = + clib_net_to_host_u32(tcp->ack_num); + } + if (PREDICT_FALSE(db->diff_window < window)) { + /* Update the db entry with window option from packet */ + db->diff_window = window; + } +#if DEBUG > 1 + printf("\n In2out MainDB seq no = %u," + "\n ack no = %u\n", + db->proto_data.tcp_seq_chk.seq_no, + db->proto_data.tcp_seq_chk.ack_no); + printf("\n In2out MAINDB window = %u\n", + db->diff_window); +#endif + } +update_pkt: + + counter = CNAT_V4_TCP_I2O_PKT_T; + disposition = CNAT_V4_TCP_I2O_T; + + /* NO FRAGMENT & ALG HANDLING. DELETING THE CODE */ + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + + tcp_in2out_nat_mss_n_checksum(ip, + tcp, + db->out2in_key.k.ipv4, + db->out2in_key.k.port, + db + /*, db->in2out_key.k.vrf */); + + /* update transaltion counters */ + db->in2out_pkts++; + in2out_forwarding_count++; + + /* update the timer for good mode, or evil mode dst_ip match */ + + if(PREDICT_FALSE(session_db != NULL)) { + V4_TCP_UPDATE_SESSION_DB_FLAG(session_db, tcp); + CNAT_DB_TIMEOUT_RST(session_db); + } else { + V4_TCP_UPDATE_SESSION_FLAG(db, tcp); + CNAT_DB_TIMEOUT_RST(db); + } + } + +drop_pkt: + + em->counters[node_counter_base_index + counter] += 1; + return disposition; +} + +#include <vnet/pipeline.h> + +static uword cnat_ipv4_tcp_inside_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + + +VLIB_REGISTER_NODE (cnat_ipv4_tcp_inside_input_node) = { + .function = cnat_ipv4_tcp_inside_input_node_fn, + .name = "vcgn-v4-tcp-i2o", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_ipv4_tcp_inside_input_error_strings), + .error_strings = cnat_ipv4_tcp_inside_input_error_strings, + + .n_next_nodes = CNAT_V4_TCP_I2O_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [CNAT_V4_TCP_I2O_E] = "vcgn-v4-tcp-i2o-e", + [CNAT_V4_TCP_I2O_T] = "ip4-input", + [CNAT_V4_TCP_I2O_D] = "error-drop", + }, +}; + +clib_error_t *cnat_ipv4_tcp_inside_input_init (vlib_main_t *vm) +{ + cnat_ipv4_tcp_inside_input_main_t * mp = &cnat_ipv4_tcp_inside_input_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_ipv4_tcp_inside_input_init); diff --git a/vnet/vnet/vcgn/cnat_ipv4_tcp_inside_input_exceptions.c b/vnet/vnet/vcgn/cnat_ipv4_tcp_inside_input_exceptions.c new file mode 100644 index 00000000000..bc1bebb04ba --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_tcp_inside_input_exceptions.c @@ -0,0 +1,314 @@ +/* + *--------------------------------------------------------------------------- + * cnat_ipv4_tcp_inside_input_exceptions.c - + * cnat_ipv4_tcp_inside_input_exceptions node pipeline stage functions + * + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_db.h" +#include "tcp_header_definitions.h" +#include "cnat_config.h" +#include "cnat_global.h" +#include "cnat_v4_functions.h" + + +#define foreach_cnat_ipv4_tcp_inside_input_exc_error \ +_(CNAT_V4_TCP_I2O_E_T_PKT, "v4 tcp i2o-e transmit natted pkt") \ +_(CNAT_V4_TCP_I2O_E_D_NON_SYN_PKT, "v4 tcp i2o-e non syn drop") \ +_(CNAT_V4_TCP_I2O_E_D_INVALID_PKT, "v4 tcp i2o-e invalid pkt drop") \ +_(CNAT_V4_TCP_I2O_E_DROP, "v4 tcp i2o-e drop") \ +_(CNAT_V4_TCP_I2O_E_GEN_ICMP, "v4 tcp i2o-e gen icmp msg") \ +_(CNAT_V4_TCP_I2O_E_D_NO_SESSION, "v4 tcp i2o-e no session db entry drop") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_tcp_inside_input_exc_error +#undef _ + CNAT_IPV4_TCP_INSIDE_INPUT_EXCEPTIONS_N_ERROR, +} cnat_ipv4_tcp_inside_input_exc_error_t; + + +static char * cnat_ipv4_tcp_inside_input_exc_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_tcp_inside_input_exc_error +#undef _ +}; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ipv4_tcp_inside_input_exc_main_t; + +typedef enum { + CNAT_V4_TCP_I2O_E_T, + //CNAT_V4_TCP_I2O_E_ICMP, + CNAT_V4_TCP_I2O_E_D, + CNAT_V4_TCP_I2O_E_NEXT, +} cnat_ipv4_udp_inside_input_exc_next_t; + +#define CNAT_V4_TCP_I2O_E_ICMP CNAT_V4_TCP_I2O_E_D + +cnat_ipv4_tcp_inside_input_exc_main_t cnat_ipv4_tcp_inside_input_exc_main; +vlib_node_registration_t cnat_ipv4_tcp_inside_input_exc_node; + +#define NSTAGES 2 + +/* + * Use the generic buffer metadata + first line of packet data prefetch + * stage function from <api/pipeline.h>. This is usually a Good Idea. + */ +#define stage0 generic_stage0 + + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi); + vlib_node_t *n = + vlib_get_node (vm, cnat_ipv4_tcp_inside_input_exc_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + cnat_gen_icmp_info info; + cnat_db_key_bucket_t ki; + cnat_main_db_entry_t *db = NULL; + ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + tcp_hdr_type *tcp = (tcp_hdr_type *)((u8*)ip + ipv4_hdr_len); + int disposition = CNAT_V4_TCP_I2O_E_T; + int counter = CNAT_V4_TCP_I2O_E_T_PKT; + cnat_key_t dest_info; + u32 window; + u8 scale; + + window = (u32)clib_net_to_host_u16(tcp->window_size); + calculate_window_scale(tcp, &scale); + + dest_info.k.port = clib_net_to_host_u16(tcp->dest_port); + dest_info.k.ipv4 = clib_net_to_host_u32(ip->dest_addr); + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + dest_info.k.vrf, CNAT_TCP) + + /* for TCP if not SYN or if src_port is 0, silently drop the packet */ + if (PREDICT_FALSE(!((tcp->flags & TCP_FLAG_SYN) && (tcp->src_port)))) { + + /* + * If the packet is dropped due to both reasons, + * count it as invalid packet drop + */ + if (!tcp->src_port) { + counter = CNAT_V4_TCP_I2O_E_D_INVALID_PKT; + } else { + counter = CNAT_V4_TCP_I2O_E_D_NON_SYN_PKT; + } + disposition = CNAT_V4_TCP_I2O_E_D; + goto in2out_e; + } + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + ki.k.k.vrf, CNAT_TCP) + + ki.k.k.ipv4 = clib_net_to_host_u32(ip->src_addr); + ki.k.k.port = clib_net_to_host_u16(tcp->src_port); + + db = cnat_get_main_db_entry_v2(&ki, PORT_SINGLE, PORT_TYPE_DYNAMIC, &info, + &dest_info); + + +#if DEBUG > 1 + if(PREDICT_TRUE(db)) { + printf("create db %x ip %x->%x port %x->%x dst_ip %x\n", db, + db->in2out_key.k.ipv4, db->out2in_key.k.ipv4, + db->in2out_key.k.port, db->out2in_key.k.port, db->dst_ipv4); + } +#endif + + + if (PREDICT_FALSE(db == 0)) { + /* failed to create new db entry due to either no more port, or user limit reached, + * need to generate ICMP type=3,code=13 msg here, + */ + + /* + * we rate limit the icmp msg per private user, + * so we don't flood a user with icmp msg + * in case the per user port limit reached + */ + if (PREDICT_TRUE(info.gen_icmp_msg == CNAT_ICMP_MSG)) { + /* KEEPING THINGS COMMENTED HERE..MAY NEED TO REVISIT AGAIN */ + #if 0 + u32 *fd = (u32*)ctx->feature_data; + fd[0] = info.svi_addr; + fd[1] = CNAT_ICMP_DEST_UNREACHABLE; + + /* + * Let's reverse the direction from i2o to o2i. + * This will help using the correct VRF in the fib lookup (AVSM) + * especially for the o2i_vrf_override case + */ + ctx->ru.rx.direction = 0; // 0 - o2i, 1 - i2o + #endif + disposition = CNAT_V4_TCP_I2O_E_ICMP; + counter = CNAT_V4_TCP_I2O_E_GEN_ICMP; + + } else { + disposition = CNAT_V4_TCP_I2O_E_D; + counter = CNAT_V4_TCP_I2O_E_DROP; + } + //DEBUG_I2O_DROP(CNAT_DEBUG_DROP_TCP) + } else { + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + + /* NAT the packet and fix checksum */ + + tcp_in2out_nat_mss_n_checksum(ip, + tcp, + db->out2in_key.k.ipv4, + db->out2in_key.k.port, + db + /*, db->in2out_key.k.vrf */); + + /* this must be inside to outside SYN, do mss here */ + + /* update translation counters */ + db->in2out_pkts++; + + /* set keepalive timer */ + + if(PREDICT_TRUE((dest_info.k.ipv4 == db->dst_ipv4) && + (dest_info.k.port == db->dst_port))) { + if(PREDICT_FALSE(!ALG_ENABLED_DB(db))) { + //This check is done since proto_data is part of union in main + //db entry + + db->proto_data.tcp_seq_chk.seq_no = + clib_net_to_host_u32(tcp->seq_num); + db->proto_data.tcp_seq_chk.ack_no = + clib_net_to_host_u32(tcp->ack_num); + db->scale = scale; + db->diff_window = window; + } +#if DEBUG > 1 + PLATFORM_DEBUG_PRINT("\nMain DB seq no = %u," + "ack no = %u, window = %u," + "scale = %u", + db->proto_data.tcp_seq_chk.seq_no, + db->proto_data.tcp_seq_chk.ack_no, + db->diff_window + db->scale); +#endif + V4_TCP_UPDATE_SESSION_FLAG(db, tcp); + /* Check timeout db if there is config for this */ + (void) query_and_update_db_timeout((void *)db, MAIN_DB_TYPE); + db->entry_expires = cnat_current_time; + } else { + /* Got to find out the session entry corresponding to this..*/ + cnat_session_entry_t *sdb; + sdb = cnat_session_db_lookup_entry( + &dest_info, db - cnat_main_db); + if(PREDICT_FALSE(sdb == NULL)) { + disposition = CNAT_V4_TCP_I2O_E_D; + counter = CNAT_V4_TCP_I2O_E_D_NO_SESSION; + goto in2out_e; + } + sdb->tcp_seq_num = clib_net_to_host_u32(tcp->seq_num); + sdb->ack_no = clib_net_to_host_u32(tcp->ack_num); + sdb->scale = scale; + sdb->window = window; + +#if DEBUG > 1 + PLATFORM_DEBUG_PRINT("\nSDB seq no = %u, ack no = %u, window = %u" + "\nSDB scale = %u" , + sdb->tcp_seq_num, + sdb->ack_no, + sdb->window, + sdb->scale); +#endif + V4_TCP_UPDATE_SESSION_DB_FLAG(sdb, tcp); + /* Check timeout db if there is config for this */ + (void) query_and_update_db_timeout((void *)sdb, SESSION_DB_TYPE); + sdb->entry_expires = cnat_current_time; + } + + //PLATFORM_CNAT_SET_TX_VRF(ctx,db->out2in_key.k.vrf) + + counter = CNAT_V4_TCP_I2O_E_T_PKT; + in2out_forwarding_count++; + } + +in2out_e: + + em->counters[node_counter_base_index + counter] += 1; + + return disposition; +} + +#include <vnet/pipeline.h> + +static uword cnat_ipv4_tcp_inside_input_exc_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + +VLIB_REGISTER_NODE (cnat_ipv4_tcp_inside_input_exc_node) = { + .function = cnat_ipv4_tcp_inside_input_exc_node_fn, + .name = "vcgn-v4-tcp-i2o-e", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_ipv4_tcp_inside_input_exc_error_strings), + .error_strings = cnat_ipv4_tcp_inside_input_exc_error_strings, + + .n_next_nodes = CNAT_V4_TCP_I2O_E_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [CNAT_V4_TCP_I2O_E_T] = "ip4-input", + [CNAT_V4_TCP_I2O_E_D] = "error-drop", + }, +}; + + +clib_error_t *cnat_ipv4_tcp_inside_input_exc_init (vlib_main_t *vm) +{ + cnat_ipv4_tcp_inside_input_exc_main_t * mp = &cnat_ipv4_tcp_inside_input_exc_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_ipv4_tcp_inside_input_exc_init); diff --git a/vnet/vnet/vcgn/cnat_ipv4_tcp_outside_input.c b/vnet/vnet/vcgn/cnat_ipv4_tcp_outside_input.c new file mode 100644 index 00000000000..bcf132b1dd7 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_tcp_outside_input.c @@ -0,0 +1,382 @@ +/* + *--------------------------------------------------------------------------- + * cnat_ipv4_tcp_outside_input.c - cnat_v4_tcp_out2in node pipeline stage functions + * + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_db.h" +#include "tcp_header_definitions.h" +#include "cnat_config.h" +#include "cnat_global.h" +#include "cnat_ipv4_udp.h" +#include "cnat_v4_functions.h" + + +#define foreach_cnat_ipv4_tcp_outside_input_error \ +_(CNAT_V4_TCP_O2I_R_PKT, "v4 tcp o2i pkt received") \ +_(CNAT_V4_TCP_O2I_T_PKT, "v4 tcp o2i pkt natted & transmitted") \ +_(CNAT_V4_TCP_O2I_LOOKUP_FAILED, "v4 tcp o2i lookup failed") \ +_(CNAT_V4_TCP_O2I_TTL_GEN, "v4 tcp o2i generated TTL Expiry ICMP packet") \ +_(CNAT_V4_TCP_O2I_TTL_DROP, "v4 tcp o2i drop due to failure in creating TTL expiry ICMP msg") \ +_(CNAT_V4_TCP_O2I_PTB_GEN, "v4 tcp o2i PTB ICMP pkt generation") \ +_(CNAT_V4_UDP_O2I_PTB_DROP, "v4 tcp o2i drop due to failure in creating PTB ICMP pkt") \ +_(CNAT_V4_TCP_O2I_SESSION_DROP, "v4 tcp o2i drop due to failure in creating session db") \ +_(CNAT_V4_TCP_O2I_SEQ_MISMATCH_DROP, "v4 tcp o2i drop due to TCP sequence mismatch") \ +_(CNAT_V4_TCP_O2I_FILTER_DROP, "v4 tcp o2i drop due to endpoint filtering") \ +_(CNAT_V4_TCP_O2I_NON_SYN_RST_DROP, "v4 tcp o2i drop due no syn/rst flag") \ +_(CNAT_V4_TCP_O2I_FIRST_FRAG_DROP, "v4 tcp o2i first fragment drop") \ +_(CNAT_V4_TCP_O2I_SUB_FRAG_NO_DB_DROP, "v4 tcp o2i subsequest frag no DB drop") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_tcp_outside_input_error +#undef _ + CNAT_IPV4_TCP_OUTSIDE_INPUT_N_ERROR, +} cnat_ipv4_tcp_outside_input_t; + +static char * cnat_ipv4_tcp_outside_input_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_tcp_outside_input_error +#undef _ +}; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ipv4_tcp_outside_input_main_t; + +typedef enum { + //CNAT_V4_TCP_O2I_E, + CNAT_V4_TCP_O2I_T, + CNAT_V4_TCP_O2I_D, + CNAT_V4_TCP_O2I_NEXT, +} cnat_ipv4_tcp_outside_input_next_t; + +cnat_ipv4_tcp_outside_input_main_t cnat_ipv4_tcp_outside_input_main; +vlib_node_registration_t cnat_ipv4_tcp_outside_input_node; + +#define NSTAGES 6 + +/* + * Use the generic buffer metadata + first line of packet data prefetch + * stage function from <api/pipeline.h>. This is usually a Good Idea. + */ +#define stage0 generic_stage0 + + +static inline void +stage1(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + u64 a, b, c; + u32 bucket; + u8 *prefetch_target; + + + vlib_buffer_t * b0 = vlib_get_buffer (vm, buffer_index); + ipv4_header *ip = vlib_buffer_get_current (b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + tcp_hdr_type *tcp = (tcp_hdr_type *)((u8*)ip + ipv4_hdr_len); + + u64 tmp = 0; + tmp = vnet_buffer(b0)->vcgn_uii.key.k.ipv4 = + clib_net_to_host_u32(ip->dest_addr); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16 (tcp->dest_port); + + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.port) << 32; + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + vnet_buffer(b0)->vcgn_uii.key.k.vrf, + CNAT_TCP) + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.vrf) << 48; + + CNAT_V4_GET_HASH(tmp, bucket, CNAT_MAIN_HASH_MASK) + + prefetch_target = (u8 *)(&cnat_out2in_hash[bucket]); + vnet_buffer(b0)->vcgn_uii.bucket = bucket; + + /* Prefetch the hash bucket */ + CLIB_PREFETCH(prefetch_target, CLIB_CACHE_LINE_BYTES, LOAD); +} + +static inline void +stage2(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ /* nothing */ } + +#define SPP_LOG2_CACHE_LINE_BYTES 6 +#define SPP_CACHE_LINE_BYTES (1 << SPP_LOG2_CACHE_LINE_BYTES) + +static inline void +stage3(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + uword prefetch_target0, prefetch_target1; + u32 bucket = vnet_buffer(b0)->vcgn_uii.bucket; + + /* read the hash bucket */ + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket + = cnat_out2in_hash[bucket].next; + + if (PREDICT_TRUE(db_index != EMPTY)) { + /* + * Prefetch database keys. We save space by not cache-line + * aligning the DB entries. We don't want to waste LSU + * bandwidth prefetching stuff we won't need. + */ + prefetch_target0 = (uword)(cnat_main_db + db_index); + CLIB_PREFETCH((void*)prefetch_target0, CLIB_CACHE_LINE_BYTES, STORE); + /* Just beyond DB key #2 */ + prefetch_target1 = prefetch_target0 + + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports); + /* If the targets are in different lines, do the second prefetch */ + if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) != + (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) { + CLIB_PREFETCH((void *)prefetch_target1, CLIB_CACHE_LINE_BYTES, STORE); + } + } +} + +static inline void +stage4(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + cnat_main_db_entry_t *db; + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + + /* + * Note: if the search already failed (empty bucket), + * the answer is already in the pipeline context structure + */ + if (PREDICT_TRUE(db_index != EMPTY)) { + + /* + * Note: hash collisions suck. We can't easily prefetch around them. + * The first trip around the track will be fast. After that, maybe + * not so much... + */ + do { + db = cnat_main_db + db_index; + if (PREDICT_TRUE(db->out2in_key.key64 == + vnet_buffer(b0)->vcgn_uii.key.key64)) { + break; + } + db_index = db->out2in_hash.next; + } while (db_index != EMPTY); + + /* Stick the answer back into the pipeline context structure */ + vnet_buffer(b0)->vcgn_uii.bucket = db_index; + } +} + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + spp_ctx_t *ctx = (spp_ctx_t *) &vnet_buffer(b0)->vcgn_uii; + int disposition = CNAT_V4_TCP_O2I_T; + int counter = CNAT_V4_TCP_O2I_T_PKT; + + ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + tcp_hdr_type *tcp = (tcp_hdr_type *)((u8*)ip + ipv4_hdr_len); + vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_tcp_outside_input_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + cnat_session_entry_t *session_db = NULL; + cnat_main_db_entry_t *db = NULL; + cnat_key_t dest_info; + + INCREMENT_NODE_COUNTER(CNAT_V4_TCP_O2I_R_PKT); + + if (PREDICT_FALSE(db_index == EMPTY)) { + nat44_dslite_common_stats[0].no_translation_entry_drops ++; + counter = CNAT_V4_TCP_O2I_LOOKUP_FAILED; + disposition = CNAT_V4_TCP_O2I_D; + } else { + if (PLATFORM_HANDLE_TTL_DECREMENT) { + if (PREDICT_FALSE(ip->ttl <= 1)) { + /* Try to generate ICMP error msg, as TTL is <= 1 */ + if (icmpv4_generate_with_throttling(ctx, + ip, ctx->ru.rx.uidb_index)) { + /* Generated ICMP */ + disposition = CNAT_V4_TCP_O2I_T_PKT; //CNAT_REWRITE_OUTPUT; + counter = CNAT_V4_TCP_O2I_TTL_GEN; + } else { + /* Could not generated ICMP - drop the packet */ + disposition = CNAT_V4_TCP_O2I_D; + counter = CNAT_V4_TCP_O2I_TTL_DROP; + } + goto drop_pkt; + } + } + db = cnat_main_db + db_index; +#if 0 + window = db->diff_window; + stored_seq_no = db->proto_data.tcp_seq_chk.seq_no; + stored_ack_no = db->proto_data.tcp_seq_chk.ack_no; + vrf_map_p = cnat_map_by_vrf + db->vrfmap_index; + vrf_index = (db->in2out_key.k.vrf & CNAT_VRF_MASK); +#endif + /* For Out2In packet, the dest info is src address and port */ + dest_info.k.port = clib_net_to_host_u16(tcp->src_port); + dest_info.k.ipv4 = clib_net_to_host_u32(ip->src_addr); + + if(PREDICT_TRUE(!PLATFORM_DBL_SUPPORT)) { + + /* No DBL support, so just update the destn and proceed */ + db->dst_ipv4 = dest_info.k.ipv4; + db->dst_port = dest_info.k.port; + goto update_pkt; + } + + + if(PREDICT_FALSE(db->dst_ipv4 != dest_info.k.ipv4 || + db->dst_port != dest_info.k.port)) { + + if(PREDICT_TRUE(db->nsessions == 0)) { + /* Should be a static entry + * Note this session as the first session and log + */ + cnat_add_dest_n_log(db, &dest_info); + //goto packet_upd; + } else if(PREDICT_FALSE(db->nsessions == 1)) { + /* Destn is not same as in main db. Multiple session + * scenario + */ + dest_info.k.vrf = db->in2out_key.k.vrf; + session_db = cnat_handle_1to2_session(db, &dest_info); + if(PREDICT_FALSE(session_db == NULL)) { + disposition = CNAT_V4_TCP_O2I_D; + counter = CNAT_V4_TCP_O2I_SESSION_DROP; + goto drop_pkt; + } + } else { /* There are already multiple destinations */ + dest_info.k.vrf = db->in2out_key.k.vrf; + /* If session already exists, + * cnat_create_session_db_entry will return the existing db + * else create a new db + * If could not create, return NULL + */ + session_db = cnat_create_session_db_entry(&dest_info, db, TRUE); + if(PREDICT_FALSE(session_db == NULL)) { + disposition = CNAT_V4_TCP_O2I_D; + counter = CNAT_V4_TCP_O2I_SESSION_DROP; + goto drop_pkt; + } + } + /* useful for ALG only */ + #if 0 + if(PREDICT_TRUE(session_db)) { + stored_seq_no = session_db->tcp_seq_num; + stored_ack_no = session_db->ack_no; + window = session_db->window; + } + #endif + } + + +update_pkt: + + counter = CNAT_V4_TCP_O2I_T_PKT; + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + + /* update ip checksum, newchecksum = ~(~oldchecksum + ~old + new) */ + cnat_v4_recalculate_tcp_checksum(ip, tcp, + &(ip->dest_addr), + &(tcp->dest_port), + db->in2out_key.k.ipv4, + db->in2out_key.k.port); + + /* CNAT_PPTP_ALG_SUPPORT */ + db->out2in_pkts++; + + nat44_dslite_global_stats[0].out2in_forwarding_count++;; + + V4_TCP_UPDATE_SESSION_FLAG(db, tcp); + + + if(PREDICT_FALSE(session_db != NULL)) { + V4_TCP_UPDATE_SESSION_DB_FLAG(session_db, tcp); + CNAT_DB_TIMEOUT_RST(session_db); + } else { + V4_TCP_UPDATE_SESSION_FLAG(db, tcp); + CNAT_DB_TIMEOUT_RST(db); + } + + } + +drop_pkt: + em->counters[node_counter_base_index + counter] += 1; + return disposition; +} + +#include <vnet/pipeline.h> + +static uword cnat_ipv4_tcp_outside_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + + +VLIB_REGISTER_NODE (cnat_ipv4_tcp_outside_input_node) = { + .function = cnat_ipv4_tcp_outside_input_node_fn, + .name = "vcgn-v4-tcp-o2i", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_ipv4_tcp_outside_input_error_strings), + .error_strings = cnat_ipv4_tcp_outside_input_error_strings, + + .n_next_nodes = CNAT_V4_TCP_O2I_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + //[CNAT_V4_TCP_O2I_E] = "vcgn-v4-tcp-o2i-e", + [CNAT_V4_TCP_O2I_T] = "ip4-input", + [CNAT_V4_TCP_O2I_D] = "error-drop", + }, +}; + +clib_error_t *cnat_ipv4_tcp_outside_input_init (vlib_main_t *vm) +{ + cnat_ipv4_tcp_outside_input_main_t * mp = &cnat_ipv4_tcp_outside_input_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_ipv4_tcp_outside_input_init); diff --git a/vnet/vnet/vcgn/cnat_ipv4_udp.h b/vnet/vnet/vcgn/cnat_ipv4_udp.h new file mode 100644 index 00000000000..f6c5b5e0133 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_udp.h @@ -0,0 +1,41 @@ +/* + *----------------------------------------------------------------------------- + * + * Filename: cnat_ipv4_udp.h + * + * Description: common functions for udp node + * + * Assumptions and Constraints: + * + * Copyright (c) 2000-2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *----------------------------------------------------------------------------- + */ + +#ifndef __CNAT_IPV4_UDP_H__ +#define __CNAT_IPV4_UDP_H__ + +#include "tcp_header_definitions.h" +#include "cnat_db.h" +#include "cnat_v4_functions.h" +#include "cnat_global.h" +#include "cnat_config.h" + +inline void swap_ip_src_udp_port(ipv4_header *ip, + udp_hdr_type_t *udp, + cnat_main_db_entry_t *db); +inline void swap_ip_dst_udp_port(ipv4_header *ip, + udp_hdr_type_t *udp, + cnat_main_db_entry_t *db, + u16 vrf); +#endif /* __CNAT_IPV4_UDP_H__ */ diff --git a/vnet/vnet/vcgn/cnat_ipv4_udp_inside_input.c b/vnet/vnet/vcgn/cnat_ipv4_udp_inside_input.c new file mode 100644 index 00000000000..657c5f1e64e --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_udp_inside_input.c @@ -0,0 +1,508 @@ +/* + *--------------------------------------------------------------------------- + * cnat_ipv4_udp_inside_input.c - cnat_ipv4_udp_inside_input node functions + * + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_global.h" +#include "cnat_db.h" +#include "cnat_ipv4_udp.h" +#include "cnat_pcp_server.h" + + +#define foreach_cnat_ipv4_udp_inside_input_error \ +_(CNAT_V4_UDP_I2O_T_PKT, "v4 udp i2o transmit") \ +_(CNAT_V4_UDP_I2O_MISS_PKT, "v4 udp i2o db miss") \ +_(CNAT_V4_UDP_I2O_TTL_GEN, "v4 udp i2o TTL gen") \ +_(CNAT_V4_UDP_I2O_TTL_DROP, "v4 udp i2o TTL drop") \ +_(CNAT_V4_PCP_PKT, "v4 pcp pkt") \ +_(CNAT_V4_UDP_I2O_SESSION_DROP, "v4 udp i2o session drop") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_udp_inside_input_error +#undef _ + CNAT_IPV4_UDP_INSIDE_INPUT_N_ERROR, +} cnat_ipv4_udp_inside_input_t; + +static char * cnat_ipv4_udp_inside_input_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_udp_inside_input_error +#undef _ +}; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ipv4_udp_inside_input_main_t; + +typedef enum { + CNAT_V4_I2O_FIXME, + CNAT_V4_UDP_I2O_E, + CNAT_REWRITE_OUTPUT, + CNAT_V4_UDP_I2O_T = CNAT_REWRITE_OUTPUT, + CNAT_N_NEXT, +} cnat_ipv4_udp_inside_input_next_t; + +#define CNAT_V4_UDP_I2O_D CNAT_V4_I2O_FIXME +#define CNAT_V4_PCP_T CNAT_V4_I2O_FIXME + +cnat_ipv4_udp_inside_input_main_t cnat_ipv4_udp_inside_input_main; +vlib_node_registration_t cnat_ipv4_udp_inside_input_node; + +#define NSTAGES 6 + +/* + * Use the generic buffer metadata + first line of packet data prefetch + * stage function from <api/pipeline.h>. This is usually a Good Idea. + */ +#define stage0 generic_stage0 + +#ifndef TOBE_PORTED +static inline u32 +is_pcp_pkt(u32 addr, u16 port) +{ + return CNAT_NO_CONFIG; +} +#else +static inline u32 +is_pcp_pkt(spp_ctx_t *ctx, u32 addr, u16 port) +{ + cnat_vrfmap_t *my_vrfmap = NULL; + u16 my_vrfmap_index; + + my_vrfmap_index = vrf_map_array[ctx->ru.rx.uidb_index]; + + if (PREDICT_TRUE(my_vrfmap_index != VRF_MAP_ENTRY_EMPTY)) { + + my_vrfmap = cnat_map_by_vrf + my_vrfmap_index; + + if (PREDICT_FALSE( port == my_vrfmap->pcp_server_port)) { + if(PREDICT_TRUE(addr == my_vrfmap->pcp_server_addr)) { + return CNAT_SUCCESS; + } + } + } + + return CNAT_NO_CONFIG; +} +#endif + +inline void swap_ip_src_udp_port(ipv4_header *ip, + udp_hdr_type_t *udp, + cnat_main_db_entry_t *db) +{ + /* + * declare varibale + */ + CNAT_UPDATE_L3_L4_CHECKSUM_DECLARE + /* + * calculate checksum + */ + CNAT_UPDATE_L3_L4_CHECKSUM(((u16)(db->in2out_key.k.ipv4)), + ((u16)(db->in2out_key.k.ipv4 >> 16)), + (db->in2out_key.k.port), + (clib_net_to_host_u16(ip->checksum)), + (clib_net_to_host_u16(udp->udp_checksum)), + ((u16)(db->out2in_key.k.ipv4)), + ((u16)(db->out2in_key.k.ipv4 >> 16)), + (db->out2in_key.k.port)) + +/* #define UDP_PACKET_DEBUG 1 */ + +// Temporary debugs which will be suppressed later +#ifdef UDP_PACKET_DEBUG + if (PREDICT_FALSE(udp_inside_packet_dump_enable)) { + printf("\nIn2Out UDP packet before translation"); + print_udp_pkt(ip); + } +#endif + + //set ip header + ip->src_addr = + clib_host_to_net_u32(db->out2in_key.k.ipv4); + ip->checksum = + clib_host_to_net_u16(new_l3_c); + + u16 frag_offset = + clib_net_to_host_u16(ip->frag_flags_offset); + + if(PREDICT_FALSE(frag_offset & IP_FRAG_OFFSET_MASK)) { + return; /* No need to update UDP fields */ + } + //set udp header + udp->src_port = + clib_host_to_net_u16(db->out2in_key.k.port); + + /* + * No easy way to avoid this if check except by using + * complex logic - may not be worth it. + */ + if (PREDICT_TRUE(udp->udp_checksum)) { + udp->udp_checksum = + clib_host_to_net_u16(new_l4_c); + } + +// Temporary debugs which will be suppressed later +#ifdef UDP_PACKET_DEBUG + if (PREDICT_FALSE(udp_inside_checksum_disable)) { + printf("\nIn2Out UDP checksum 0x%x disabled by force", new_l4_c); + udp->udp_checksum = 0; + } + if (PREDICT_FALSE(udp_inside_packet_dump_enable)) { + printf("\nIn2Out UDP packet after translation"); + print_udp_pkt(ip); + } +#endif +} + +static inline void +stage1(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + u64 a, b, c; + u32 bucket; + u8 *prefetch_target; + + vlib_buffer_t * b0 = vlib_get_buffer (vm, buffer_index); + ipv4_header *ip = vlib_buffer_get_current (b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + udp_hdr_type_t *udp = (udp_hdr_type_t *)((u8*)ip + ipv4_hdr_len); + + u64 tmp = 0; + tmp = vnet_buffer(b0)->vcgn_uii.key.k.ipv4 = + clib_net_to_host_u32(ip->src_addr); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16 (udp->src_port); + + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.port) << 32; + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + vnet_buffer(b0)->vcgn_uii.key.k.vrf, + CNAT_UDP) + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.vrf) << 48; + + CNAT_V4_GET_HASH(tmp, bucket, CNAT_MAIN_HASH_MASK) + + prefetch_target = (u8 *)(&cnat_in2out_hash[bucket]); + vnet_buffer(b0)->vcgn_uii.bucket = bucket; + + /* Prefetch the hash bucket */ + CLIB_PREFETCH(prefetch_target, CLIB_CACHE_LINE_BYTES, LOAD); +} + +static inline void +stage2(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ /* nothing */ } + +#define SPP_LOG2_CACHE_LINE_BYTES 6 +#define SPP_CACHE_LINE_BYTES (1 << SPP_LOG2_CACHE_LINE_BYTES) + +static inline void +stage3(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + uword prefetch_target0, prefetch_target1; + u32 bucket = vnet_buffer(b0)->vcgn_uii.bucket; + + /* read the hash bucket */ + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket + = cnat_in2out_hash[bucket].next; + + if (PREDICT_TRUE(db_index != EMPTY)) { + /* + * Prefetch database keys. We save space by not cache-line + * aligning the DB entries. We don't want to waste LSU + * bandwidth prefetching stuff we won't need. + */ + prefetch_target0 = (uword)(cnat_main_db + db_index); + CLIB_PREFETCH((void*)prefetch_target0, CLIB_CACHE_LINE_BYTES, LOAD); + /* Just beyond DB key #2 */ + prefetch_target1 = prefetch_target0 + + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports); + /* If the targets are in different lines, do the second prefetch */ + if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) != + (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) { + CLIB_PREFETCH((void *)prefetch_target1, CLIB_CACHE_LINE_BYTES, LOAD); + } + } +} + +static inline void +stage4(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + cnat_main_db_entry_t *db; + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + + /* + * Note: if the search already failed (empty bucket), + * the answer is already in the pipeline context structure + */ + if (PREDICT_TRUE(db_index != EMPTY)) { + + /* + * Note: hash collisions suck. We can't easily prefetch around them. + * The first trip around the track will be fast. After that, maybe + * not so much... + */ + do { + db = cnat_main_db + db_index; + if (PREDICT_TRUE(db->in2out_key.key64 == + vnet_buffer(b0)->vcgn_uii.key.key64)) { + break; + } + db_index = db->in2out_hash.next; + } while (db_index != EMPTY); + + /* Stick the answer back into the pipeline context structure */ + vnet_buffer(b0)->vcgn_uii.bucket = db_index; + } +} + +static u64 pkt_num = 0; +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + spp_ctx_t *ctx = (spp_ctx_t *) &vnet_buffer(b0)->vcgn_uii; + int disposition = CNAT_V4_UDP_I2O_T; + int counter = CNAT_V4_UDP_I2O_T_PKT; + ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + udp_hdr_type_t *udp = (udp_hdr_type_t *)((u8*)ip + ipv4_hdr_len); + vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_udp_inside_input_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + cnat_session_entry_t *session_db = NULL; + cnat_key_t dest_info; + + pkt_num++; + + if(PREDICT_FALSE(is_pcp_pkt(ip->dest_addr, udp->dest_port) == + CNAT_SUCCESS)) + { + PCP_INCR(input); + disposition = CNAT_V4_PCP_T; + counter = CNAT_V4_PCP_PKT; + + goto pcp_pkt; + } + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + if (PREDICT_FALSE(ip->ttl <= 1)) { + /* Try to generate ICMP error msg, as TTL is <= 1 */ + + if (icmpv4_generate_with_throttling + (ctx, ip, ctx->ru.rx.uidb_index)) { + /* Generated ICMP */ + disposition = CNAT_REWRITE_OUTPUT; + counter = CNAT_V4_UDP_I2O_TTL_GEN; + } else { + /* Could not generated ICMP - drop the packet */ + disposition = CNAT_V4_UDP_I2O_D; + counter = CNAT_V4_UDP_I2O_TTL_DROP; + } + goto drop_pkt; + } + } + if (PREDICT_TRUE(db_index != EMPTY)) { + cnat_main_db_entry_t *db = cnat_main_db + db_index; + + dest_info.k.ipv4 = clib_net_to_host_u32(ip->dest_addr); + + /* MUST revisit: it seems farg is set to 1 for few packets & because of + * this the port is not updated & it becomes 0. Commenting teporarily + * this fargment check & setting dst port with udp dst port value */ + dest_info.k.port = clib_net_to_host_u16(udp->dest_port); + #if 0 // DONOT REMOVE THIS if 0 + if(PREDICT_FALSE(ctx->ru.rx.frag)) { +#ifdef TOBE_PORTED + /* Must have routed through cnat_v4_frag_in2out node */ + u16 *feature_data_ports = (u16 *)&ctx->feature_data[4]; + dest_info.k.port = *feature_data_ports; +#endif + } else { + dest_info.k.port = clib_net_to_host_u16(udp->dest_port); + } + #endif + + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + + if(PREDICT_TRUE(!PLATFORM_DBL_SUPPORT)) { + + /* No DBL support, so just update the destn and proceed */ + db->dst_ipv4 = dest_info.k.ipv4; + db->dst_port = dest_info.k.port; + CNAT_DB_TIMEOUT_RST(db); + goto update_pkt; + } + + if(PREDICT_TRUE((db->dst_ipv4 == dest_info.k.ipv4) && + (db->dst_port == dest_info.k.port))) { + + CNAT_DB_TIMEOUT_RST(db); + goto update_pkt; + } else { + if (PREDICT_FALSE(db->nsessions == 0)) { + /* Should be a static entry + * Note this session as the first session and log + */ + cnat_add_dest_n_log(db, &dest_info); + /* + * update db counter, timer + */ + + CNAT_DB_TIMEOUT_RST(db); + + } else if(PREDICT_TRUE(db->nsessions == 1)) { + /* Destn is not same as in main db. Multiple session + * scenario + */ + //printf(">>> [pkt# %lu] src_ip: 0x%x, db ip: 0x%x, db port: %u; dest ip: 0x%x, dest port: %u\n", + // pkt_num, ntohl(ip->src_addr), db->dst_ipv4, db->dst_port, dest_info.k.ipv4, dest_info.k.port); + + dest_info.k.vrf = db->in2out_key.k.vrf; + session_db = cnat_handle_1to2_session(db, &dest_info); + + if(PREDICT_TRUE(session_db != NULL)) { + /* session exists */ + CNAT_DB_TIMEOUT_RST(session_db); + } else { + /* could not create session db - drop packet */ + disposition = CNAT_V4_UDP_I2O_D; + counter = CNAT_V4_UDP_I2O_SESSION_DROP; + goto drop_pkt; + } + + } else { + /* More than 2 sessions exists */ + + dest_info.k.vrf = db->in2out_key.k.vrf; + + /* If session already exists, + * cnat_create_session_db_entry will return the existing db + * else create a new db + * If could not create, return NULL + */ + session_db = cnat_create_session_db_entry(&dest_info, + db, TRUE); + + if(PREDICT_FALSE(session_db != NULL)) { + /* session exists */ + CNAT_DB_TIMEOUT_RST(session_db); + } else { + /* could not create session db - drop packet */ + disposition = CNAT_V4_UDP_I2O_D; + counter = CNAT_V4_UDP_I2O_SESSION_DROP; + goto drop_pkt; + } + } + } + +update_pkt: + /* + * 1. update src ipv4 addr and src udp port + * 2. update ipv4 checksum and udp checksum + */ + swap_ip_src_udp_port(ip, udp, db); + /* + * update db counter, timer + */ + + db->in2out_pkts++; + + /* + * need to set outside vrf + * from db->out2in_key.k.vrf + */ + + /* Temporarily keeping this commented */ + //PLATFORM_CNAT_SET_TX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_TX], + // db->out2in_key.k.vrf) + + in2out_forwarding_count++; + + } else { + disposition = CNAT_V4_UDP_I2O_E; + counter = CNAT_V4_UDP_I2O_MISS_PKT; + } + +drop_pkt: +pcp_pkt: + + em->counters[node_counter_base_index + counter] += 1; + + return disposition; +} + +#include <vnet/pipeline.h> + +static uword cnat_ipv4_udp_inside_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + + +VLIB_REGISTER_NODE (cnat_ipv4_udp_inside_input_node) = { + .function = cnat_ipv4_udp_inside_input_node_fn, + .name = "vcgn-v4-udp-i2o", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_ipv4_udp_inside_input_error_strings), + .error_strings = cnat_ipv4_udp_inside_input_error_strings, + + .n_next_nodes = CNAT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [CNAT_V4_I2O_FIXME] = "error-drop", + // [CNAT_V4_UDP_I2O_T] = "ip4-input", + [CNAT_V4_UDP_I2O_E] = "vcgn-v4-udp-i2o-e", + [CNAT_REWRITE_OUTPUT] = "ip4-input", + }, +}; + +clib_error_t *cnat_ipv4_udp_inside_input_init (vlib_main_t *vm) +{ + cnat_ipv4_udp_inside_input_main_t * mp = &cnat_ipv4_udp_inside_input_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_ipv4_udp_inside_input_init); diff --git a/vnet/vnet/vcgn/cnat_ipv4_udp_inside_input_exceptions.c b/vnet/vnet/vcgn/cnat_ipv4_udp_inside_input_exceptions.c new file mode 100644 index 00000000000..f078c8d4391 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_udp_inside_input_exceptions.c @@ -0,0 +1,283 @@ +/* + *--------------------------------------------------------------------------- + * cnat_ipv4_udp_inside_input_exception_stages.c - cnat_ipv4_udp_inside_input_exception node pipeline stage functions + * + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_global.h" +#include "cnat_db.h" +#include "cnat_ipv4_udp.h" + +/* + * Dump these counters via the "show error" CLI command + */ + +#define foreach_cnat_ipv4_udp_inside_input_exc_error \ +_(CNAT_V4_UDP_I2O_T_PKT, "v4 udp i2o transmit") \ +_(CNAT_V4_UDP_I2O_D_PKT, "v4 udp i2o drop") \ +_(CNAT_V4_ICMP_G_I2O_T_PKT, "v4 udp i2o icmp msg gen") \ +_(CNAT_V4_UDP_I2O_DC_PKT, "v4 udp i2o (no config) drop") \ +_(CNAT_V4_UDP_I2O_DR_PKT, "v4 udp i2o (not in run state) drop") \ +_(CNAT_V4_UDP_I2O_DD_PKT, "v4 udp i2o (no direct port) drop") \ +_(CNAT_V4_UDP_I2O_DA_PKT, "v4 udp i2o (no any port) drop") \ +_(CNAT_V4_UDP_I2O_DO_PKT, "v4 udp i2o (out of port limit) drop") \ +_(CNAT_V4_UDP_I2O_DI_PKT, "v4 udp i2o (invalid packet) drop") \ +_(CNAT_V4_UDP_I2O_DS_PKT, "v4 udp i2o (no sessoon db) drop") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_udp_inside_input_exc_error +#undef _ + CNAT_IPV4_UDP_INSIDE_INPUT_EXCEPTIONS_N_ERROR, +} cnat_ipv4_udp_inside_input_exc_error_t; + + +static char * cnat_ipv4_udp_inside_input_exc_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_udp_inside_input_exc_error +#undef _ +}; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ipv4_udp_inside_input_exc_main_t; + +typedef enum { + CNAT_V4_UDP_I2O_T, + CNAT_V4_UDP_I2O_D, + CNAT_V4_ICMP_G_I2O_T = CNAT_V4_UDP_I2O_D, /* TOBE_PORTED */ + CNAT_V4_UDP_INSIDE_INPUT_EXC_N_NEXT, +} cnat_ipv4_udp_inside_input_exc_next_t; + +cnat_ipv4_udp_inside_input_exc_main_t cnat_ipv4_udp_inside_input_exc_main; +vlib_node_registration_t cnat_ipv4_udp_inside_input_exc_node; + +#define NSTAGES 2 + +/* + * Use the generic buffer metadata + first line of packet data prefetch + * stage function from <api/pipeline.h>. This is usually a Good Idea. + */ +#define stage0 generic_stage0 + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi); + vlib_node_t *n = + vlib_get_node (vm, cnat_ipv4_udp_inside_input_exc_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + cnat_gen_icmp_info info; + cnat_db_key_bucket_t ki; + spp_ctx_t *ctx __attribute__((unused)) + = (spp_ctx_t *) &vnet_buffer(b0)->vcgn_uii; + cnat_main_db_entry_t *db = NULL; + ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + udp_hdr_type_t *udp = (udp_hdr_type_t *)((u8*)ip + ipv4_hdr_len); + int disposition = CNAT_V4_UDP_I2O_T; + int counter = CNAT_V4_UDP_I2O_T_PKT; + + cnat_key_t dest_info; + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + vnet_buffer(b0)->vcgn_uii.key.k.vrf, + CNAT_UDP) + + vnet_buffer(b0)->vcgn_uii.key.k.ipv4 = clib_net_to_host_u32(ip->src_addr); + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + ki.k.k.vrf, CNAT_UDP) + + ki.k.k.ipv4 = clib_net_to_host_u32(ip->src_addr); + + + /* MUST REVISIT: commentting frag check. Unconditional destination port + * update. DONOT remove this #if 0 */ + ki.k.k.port = + clib_net_to_host_u16(udp->src_port); + dest_info.k.port = + clib_net_to_host_u16(udp->dest_port); +#if 0 + if(PREDICT_FALSE(ctx->ru.rx.frag)) { +#ifdef TOBE_PORTED + /* Must have routed through cnat_v4_frag_in2out node */ + u16 *feature_data_ports = (u16 *)&ctx->feature_data[2]; + ki.k.k.port = *feature_data_ports; + feature_data_ports++; + dest_info.k.port = *feature_data_ports; +#endif + } else { + ki.k.k.port = + clib_net_to_host_u16(udp->src_port); + dest_info.k.port = + clib_net_to_host_u16(udp->dest_port); + } +#endif /* if 0 */ + + dest_info.k.ipv4 = clib_net_to_host_u32(ip->dest_addr); + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + dest_info.k.vrf, CNAT_UDP) + + if (PREDICT_TRUE(ki.k.k.port)) { + if (ki.k.k.port & 0x1) { + db = cnat_get_main_db_entry_v2(&ki, PORT_S_ODD, PORT_TYPE_DYNAMIC, + &info, &dest_info); + } else { + db = cnat_get_main_db_entry_v2(&ki, PORT_S_EVEN, PORT_TYPE_DYNAMIC, + &info, &dest_info); + } + } else { + /* + * No UDP port value of 0 - drop it + */ + db = NULL; + info.error = CNAT_ERR_BAD_TCP_UDP_PORT; + } + + if (PREDICT_TRUE((u64)db)) { + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + + /* + * step 6 do nat before fwd pkt + */ + swap_ip_src_udp_port(ip, udp, db); + /* + * update db for this pkt + */ + CNAT_DB_UPDATE_IN2OUT_TIMER + + /* Check timeout db if there is config for this */ + (void) query_and_update_db_timeout((void *)db, MAIN_DB_TYPE); + +/* Temporarily keeping it commented */ + //PLATFORM_CNAT_SET_TX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_TX], + // db->out2in_key.k.vrf) + in2out_forwarding_count++; + + } else { + switch (info.error) { + case (CNAT_NO_VRF_RUN): + em->counters[node_counter_base_index + CNAT_V4_UDP_I2O_DR_PKT] += 1; + break; + case (CNAT_OUT_LIMIT): + em->counters[node_counter_base_index + CNAT_V4_UDP_I2O_DO_PKT] += 1; + break; + case (CNAT_NO_PORT_ANY): + case (CNAT_NO_POOL_ANY): + case (CNAT_BAD_INUSE_ANY): + case (CNAT_NOT_FOUND_ANY): + em->counters[node_counter_base_index + CNAT_V4_UDP_I2O_DA_PKT] += 1; + break; + case (CNAT_INV_PORT_DIRECT): + case (CNAT_DEL_PORT_DIRECT): + case (CNAT_BAD_INUSE_DIRECT): + case (CNAT_NOT_FOUND_DIRECT): + em->counters[node_counter_base_index + CNAT_V4_UDP_I2O_DD_PKT] += 1; + break; + case (CNAT_ERR_BAD_TCP_UDP_PORT): + em->counters[node_counter_base_index + CNAT_V4_UDP_I2O_DI_PKT] += 1; + break; + case (CNAT_ERR_NO_SESSION_DB): + em->counters[node_counter_base_index + CNAT_V4_UDP_I2O_DS_PKT] += 1; + break; + default: + em->counters[node_counter_base_index + CNAT_V4_UDP_I2O_DC_PKT] += 1; + break; + } + /* + * send to icmp msg generate node + */ + if (info.gen_icmp_msg == CNAT_ICMP_MSG) { +#ifdef TOBE_PORTED + u32 *fd = (u32*)ctx->feature_data; + fd[0] = info.svi_addr; + fd[1] = CNAT_ICMP_DEST_UNREACHABLE; +#endif + disposition = CNAT_V4_ICMP_G_I2O_T; + counter = CNAT_V4_ICMP_G_I2O_T_PKT; + } else { + disposition = CNAT_V4_UDP_I2O_D; + counter = CNAT_V4_UDP_I2O_D_PKT; + } + DEBUG_I2O_DROP(CNAT_DEBUG_DROP_UDP) + } + + em->counters[node_counter_base_index + counter] += 1; + + return disposition; +} + + +#include <vnet/pipeline.h> + +static uword cnat_ipv4_udp_inside_input_exc_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + +VLIB_REGISTER_NODE (cnat_ipv4_udp_inside_input_exc_node) = { + .function = cnat_ipv4_udp_inside_input_exc_node_fn, + .name = "vcgn-v4-udp-i2o-e", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_ipv4_udp_inside_input_exc_error_strings), + .error_strings = cnat_ipv4_udp_inside_input_exc_error_strings, + + .n_next_nodes = CNAT_V4_UDP_INSIDE_INPUT_EXC_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [CNAT_V4_UDP_I2O_T] = "ip4-input", + [CNAT_V4_UDP_I2O_D] = "error-drop", + }, +}; + + +clib_error_t *cnat_ipv4_udp_inside_input_exc_init (vlib_main_t *vm) +{ + cnat_ipv4_udp_inside_input_exc_main_t * mp = &cnat_ipv4_udp_inside_input_exc_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_ipv4_udp_inside_input_exc_init); + diff --git a/vnet/vnet/vcgn/cnat_ipv4_udp_outside_input.c b/vnet/vnet/vcgn/cnat_ipv4_udp_outside_input.c new file mode 100644 index 00000000000..203568c8e0d --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ipv4_udp_outside_input.c @@ -0,0 +1,605 @@ + +/* + *--------------------------------------------------------------------------- + * cnat_ipv4_udp_outside_input_stages.c - cnat_ipv4_udp_outside_input node pipeline stage functions + * + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_ipv4_udp.h" +#include "dslite_db.h" +#include "cnat_db.h" +#include "cnat_v4_functions.h" + +//#include <dslite_v6_functions.h> +//#include <pool.h> +//#include "cnat_va_db.h" + +#define foreach_cnat_ipv4_udp_outside_input_error \ +_(CNAT_V4_UDP_O2I_T_PKT, "v4 udp o2i transmit") \ +_(CNAT_V4_DSLITE_ENCAP_CTR, "to dslite encap") \ +_(CNAT_V4_UDP_O2I_MISS_PKT, "v4 udp o2i db miss drop") \ +_(CNAT_V4_UDP_O2I_TTL_GEN, "v4 udp o2i TTL gen") \ +_(CNAT_V4_UDP_O2I_TTL_DROP, "v4 udp o2i TTL drop") \ +_(CNAT_V4_UDP_O2I_PTB_GEN, "v4 ptb gen") \ +_(CNAT_V4_UDP_O2I_PTB_DROP, "v4 ptb throttle drop") \ +_(CNAT_V4_UDP_O2I_SESSION_DROP, "v4 udp o2i session drop") \ +_(CNAT_V4_UDP_O2I_FILTER_DROP, "v4 udp o2i drop: end point filtering") \ +_(CNAT_V4_UDP_O2I_SUB_FRAG_NO_DB_DROP, "v4 udp o2i subsequent frag no DB drop") \ +_(CNAT_V4_UDP_O2I_1ST_FRAG_FILTER_DROP, "v4 udp i2o 1st frag filter drop") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_udp_outside_input_error +#undef _ + CNAT_IPV4_UDP_OUTSIDE_INPUT_N_ERROR, +} cnat_ipv4_udp_outside_input_t; + +static char * cnat_ipv4_udp_outside_input_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_udp_outside_input_error +#undef _ +}; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ipv4_udp_outside_input_main_t; + +typedef enum { + //CNAT_V4_O2I_FIXME, + CNAT_V4_UDP_O2I_E, + CNAT_V4_UDP_O2I_T, + CNAT_V4_UDP_O2I_NEXT, +} cnat_ipv4_udp_outside_input_next_t; + +//#define CNAT_V4_DSLITE_ENCAP CNAT_V4_O2I_FIXME +//#define CNAT_V4_UDP_O2I_E CNAT_V4_O2I_FIXME + +cnat_ipv4_udp_outside_input_main_t cnat_ipv4_udp_outside_input_main; +vlib_node_registration_t cnat_ipv4_udp_outside_input_node; + +#define NSTAGES 6 + +/* + * Use the generic buffer metadata + first line of packet data prefetch + * stage function from <api/pipeline.h>. This is usually a Good Idea. + */ +#define stage0 generic_stage0 + + +#if 0 +typedef struct cnat_ipv4_udp_outside_input_pipeline_data_ { + //spp_node_main_vector_t *nmv; + dslite_common_pipeline_data_t common_data; + /* Add additional pipeline stage data here... */ + u32 bucket; +#ifdef DSLITE_DEF + u32 user_bucket; + dslite_v4_to_v6_udp_counter_t *udp_counter; + dslite_icmp_gen_counter_t *icmp_gen_counter; + +#endif + cnat_key_t ki; + udp_hdr_type_t *udp; + u8 frag_pkt; +} cnat_ipv4_udp_outside_input_pipeline_data_t; + +#endif + +#define CNAT_UDP_OUTSIDE_UPDATE_FLAG_TIMER(db,dslite_nat44_inst_id) \ + if (PREDICT_FALSE(!(db->flags & CNAT_DB_FLAG_UDP_ACTIVE))) { \ + db->flags |= CNAT_DB_FLAG_UDP_ACTIVE; \ + CNAT_DB_TIMEOUT_RST(db); \ + } else if (PREDICT_FALSE(db->flags & CNAT_DB_DSLITE_FLAG)) { \ + if (PREDICT_TRUE(dslite_table_db_ptr[dslite_nat44_inst_id].mapping_refresh_both_direction)) { \ + CNAT_DB_TIMEOUT_RST(db); \ + } \ + } else if (PREDICT_TRUE(mapping_refresh_both_direction)) { \ + CNAT_DB_TIMEOUT_RST(db); \ + } \ + +#if 0 +static cnat_ipv4_udp_outside_input_pipeline_data_t pctx_data[SPP_MAXDISPATCH]; +#define EXTRA_PIPELINE_ARGS_PROTO , cnat_ipv4_udp_outside_input_pipeline_data_t *pctx +#define EXTRA_PIPELINE_ARGS , pctx + +#endif + +/*inline u32 +is_static_dest_nat_enabled(u16 vrf) +{ + if(static_dest_vrf_map_array[vrf] == 1) { + return CNAT_SUCCESS; + } + return CNAT_NO_CONFIG; +}*/ + +inline void swap_ip_dst(ipv4_header *ip, + cnat_main_db_entry_t *db, u16 vrf) +{ + + CNAT_UPDATE_L3_CHECKSUM_DECLARE + /* + * calculate checksum + */ + CNAT_UPDATE_L3_CHECKSUM(((u16)(db->out2in_key.k.ipv4)), + ((u16)(db->out2in_key.k.ipv4 >> 16)), + (clib_host_to_net_u16(ip->checksum)), + ((u16)(db->in2out_key.k.ipv4)), + ((u16)(db->in2out_key.k.ipv4 >> 16))) + //set ip header + ip->dest_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + ip->checksum = + clib_host_to_net_u16(new_l3_c); + +#if 0 + + if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) { + direction = 1; + if(cnat_static_dest_db_get_translation(ip->src_addr, &postmap_ip, vrf, direction) == CNAT_SUCCESS) { + old_ip = spp_net_to_host_byte_order_32(&(ip->src_addr)); + old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip); + + CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (spp_net_to_host_byte_order_16(&(ip->checksum))), + ((u16)(old_postmap_ip & 0xFFFF)), + ((u16)(old_postmap_ip >> 16))) + ip->checksum = + clib_host_to_net_u16(new_l3_c); + ip->src_addr = postmap_ip; + } + } +#endif +} + +inline void swap_ip_dst_udp_port(ipv4_header *ip, + udp_hdr_type_t *udp, + cnat_main_db_entry_t *db, u16 vrf) +{ + +#define UDP_PACKET_DEBUG 1 + +// Temporary debugs which will be suppressed later +#ifdef UDP_PACKET_DEBUG + if (PREDICT_FALSE(udp_outside_packet_dump_enable)) { + printf("\nOut2In UDP packet before translation"); + print_udp_pkt(ip); + } +#endif + +#if 0 + if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) { + direction = 1; + if(cnat_static_dest_db_get_translation(ip->src_addr, &postmap_ip, vrf, direction) == CNAT_SUCCESS) { + + CNAT_UPDATE_L3_L4_CHECKSUM_DECLARE + + old_ip = spp_net_to_host_byte_order_32(&(ip->src_addr)); + old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip); + + CNAT_UPDATE_L3_L4_CHECKSUM(((u16)(old_ip & 0xFFFF)), + ((u16)(old_ip >> 16)), + (spp_net_to_host_byte_order_16(&(udp->src_port))), + (spp_net_to_host_byte_order_16(&(ip->checksum))), + (spp_net_to_host_byte_order_16(&(udp->udp_checksum))), + ((u16)(old_postmap_ip & 0xFFFF)), + ((u16)(old_postmap_ip >> 16)), + (spp_net_to_host_byte_order_16(&(udp->src_port)))) + + ip->checksum = + clib_host_to_net_u16(new_l3_c); + ip->src_addr = postmap_ip; + if (PREDICT_TRUE(udp->udp_checksum)) { + udp->udp_checksum = clib_host_to_net_u16(new_l4_c); + } + } + } +#endif + /* + * declare variable + */ + CNAT_UPDATE_L3_L4_CHECKSUM_DECLARE + /* + * calculate checksum + */ + CNAT_UPDATE_L3_L4_CHECKSUM(((u16)(db->out2in_key.k.ipv4)), + ((u16)(db->out2in_key.k.ipv4 >> 16)), + (db->out2in_key.k.port), + (clib_net_to_host_u16(ip->checksum)), + (clib_net_to_host_u16(udp->udp_checksum)), + ((u16)(db->in2out_key.k.ipv4)), + ((u16)(db->in2out_key.k.ipv4 >> 16)), + (db->in2out_key.k.port)) + + + + + //set ip header + ip->dest_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + ip->checksum = + clib_host_to_net_u16(new_l3_c); + + //set udp header + udp->dest_port = + clib_host_to_net_u16(db->in2out_key.k.port); + + /* + * No easy way to avoid this if check except by using + * complex logic - may not be worth it. + */ + if (PREDICT_TRUE(udp->udp_checksum)) { + udp->udp_checksum = clib_host_to_net_u16(new_l4_c); + } + + + +// Temporary debugs which will be suppressed later +#ifdef UDP_PACKET_DEBUG + if (PREDICT_FALSE(udp_outside_checksum_disable)) { + printf("\nOut2In UDP checksum 0x%x disabled by force", new_l4_c); + udp->udp_checksum = 0; + } + if (PREDICT_FALSE(udp_outside_packet_dump_enable)) { + printf("\nOut2In UDP packet after translation"); + print_udp_pkt(ip); + } +#endif +} + +static inline void +stage1(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + u64 a, b, c; + u32 bucket; + u8 *prefetch_target; + + vlib_buffer_t * b0 = vlib_get_buffer (vm, buffer_index); + ipv4_header *ip = vlib_buffer_get_current (b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + udp_hdr_type_t *udp = (udp_hdr_type_t *)((u8*)ip + ipv4_hdr_len); + + u64 tmp = 0; + tmp = vnet_buffer(b0)->vcgn_uii.key.k.ipv4 = + clib_net_to_host_u32(ip->dest_addr); + vnet_buffer(b0)->vcgn_uii.key.k.port = + clib_net_to_host_u16 (udp->dest_port); + + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.port) << 32; + + PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX], + vnet_buffer(b0)->vcgn_uii.key.k.vrf, + CNAT_UDP) + tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.vrf) << 48; + + CNAT_V4_GET_HASH(tmp, bucket, CNAT_MAIN_HASH_MASK) + + prefetch_target = (u8 *)(&cnat_out2in_hash[bucket]); + vnet_buffer(b0)->vcgn_uii.bucket = bucket; + + /* Prefetch the hash bucket */ + CLIB_PREFETCH(prefetch_target, CLIB_CACHE_LINE_BYTES, LOAD); +} + +static inline void +stage2(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ /* nothing */ } + +#define SPP_LOG2_CACHE_LINE_BYTES 6 +#define SPP_CACHE_LINE_BYTES (1 << SPP_LOG2_CACHE_LINE_BYTES) + +static inline void +stage3(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + uword prefetch_target0, prefetch_target1; + u32 bucket = vnet_buffer(b0)->vcgn_uii.bucket; + + /* read the hash bucket */ + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket + = cnat_out2in_hash[bucket].next; + + if (PREDICT_TRUE(db_index != EMPTY)) { + /* + * Prefetch database keys. We save space by not cache-line + * aligning the DB entries. We don't want to waste LSU + * bandwidth prefetching stuff we won't need. + */ + prefetch_target0 = (uword)(cnat_main_db + db_index); + CLIB_PREFETCH((void*)prefetch_target0, CLIB_CACHE_LINE_BYTES, STORE); + /* Just beyond DB key #2 */ + prefetch_target1 = prefetch_target0 + + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports); + /* If the targets are in different lines, do the second prefetch */ + if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) != + (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) { + CLIB_PREFETCH((void *)prefetch_target1, CLIB_CACHE_LINE_BYTES, STORE); + } + } +} + +static inline void +stage4(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + cnat_main_db_entry_t *db; + vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + + /* + * Note: if the search already failed (empty bucket), + * the answer is already in the pipeline context structure + */ + if (PREDICT_TRUE(db_index != EMPTY)) { + + /* + * Note: hash collisions suck. We can't easily prefetch around them. + * The first trip around the track will be fast. After that, maybe + * not so much... + */ + do { + db = cnat_main_db + db_index; + if (PREDICT_TRUE(db->out2in_key.key64 == + vnet_buffer(b0)->vcgn_uii.key.key64)) { + break; + } + db_index = db->out2in_hash.next; + } while (db_index != EMPTY); + + /* Stick the answer back into the pipeline context structure */ + vnet_buffer(b0)->vcgn_uii.bucket = db_index; + } +} + +#if 0 + +ALWAYS_INLINE( +static inline void +stage5(spp_ctx_t **ctxs, int index, spp_node_t *np, + u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO)) +{ + spp_ctx_t *ctx = ctxs[index]; + u32 db_index = pctx[index].bucket; + /* for nat44, dslite_id will be 1 */ + u16 dslite_id = *(pctx[index].common_data.dslite_id_ptr); + + DSLITE_PREFETCH_COUNTER(pctx[index].udp_counter, + &dslite_all_counters[dslite_id].v46_udp_counters, + dslite_v4_to_v6_udp_counter_t, + v4_to_v6_udp_output_count, + "V4_TO_V6_UDP") + + DSLITE_PREFETCH_COUNTER(pctx[index].icmp_gen_counter, + &dslite_all_counters[dslite_id].dslite_icmp_gen_counters, + dslite_icmp_gen_counter_t, + v6_icmp_gen_count, + "V4_TO_V6_icmp") + +if (PREDICT_TRUE(db_index != EMPTY)) { + cnat_main_db_entry_t *db = cnat_main_db + db_index; + + u32 user_db_index = db->user_index; + DSLITE_PRINTF(1, "UDP o2i, db entry found %u %u %u\n", + db_index, user_db_index, + db->dslite_nat44_inst_id); + uword prefetch_target0 = (uword)(cnat_user_db + user_db_index); + SPP_PREFETCH(prefetch_target0, 0, LOAD); + pctx[index].user_bucket = user_db_index; + DSLITE_PRINTF(1, "UDP: Done with prefetch..\n"); +} else { + DSLITE_PRINTF(1, "UDP: Stage 5, db_index empty...\n"); +} +} + +#endif + + +static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi) +{ + + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi); + u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket; + //spp_ctx_t *ctx = (spp_ctx_t *) &vnet_buffer(b0)->vcgn_uii; + int disposition = CNAT_V4_UDP_O2I_T; + int counter = CNAT_V4_UDP_O2I_T_PKT; + ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0); + u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; + udp_hdr_type_t *udp = (udp_hdr_type_t *)((u8*)ip + ipv4_hdr_len); + vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_udp_outside_input_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + cnat_session_entry_t *session_db = NULL; + cnat_main_db_entry_t *db = NULL; + cnat_key_t dest_info; + u16 dslite_nat44_inst_id __attribute__((unused)) = 0; + + dest_info.k.port = clib_net_to_host_u16(udp->src_port); + dest_info.k.ipv4 = clib_net_to_host_u32(ip->src_addr); + + if (PREDICT_TRUE(db_index != EMPTY)) { + /* TTL gen was disabled for nat44 earlier + * But since dslite has got integrated in this + * TTL gen is enabled + */ + + db = cnat_main_db + db_index; + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + if(PREDICT_TRUE(!PLATFORM_DBL_SUPPORT)) { + + /* No DBL support, so just update the destn and proceed */ + db->dst_ipv4 = dest_info.k.ipv4; + db->dst_port = dest_info.k.port; + CNAT_UDP_OUTSIDE_UPDATE_FLAG_TIMER(db, 0) + goto update_pkt; + } + + + if(PREDICT_TRUE((db->dst_ipv4 == dest_info.k.ipv4) && + (db->dst_port == dest_info.k.port))) { + + CNAT_UDP_OUTSIDE_UPDATE_FLAG_TIMER(db, 0) + goto update_pkt; + } else { + /* The session entries belonging to this entry are checked to find + * if an entry exist whose destination IP and port match with the + * source IP and port of the packet being processed + */ + dest_info.k.vrf = db->in2out_key.k.vrf; + + if (PREDICT_FALSE(db->nsessions == 0)) { + /* Should be a static entry + * Note this session as the first session and log + */ + cnat_add_dest_n_log(db, &dest_info); + CNAT_UDP_OUTSIDE_UPDATE_FLAG_TIMER(db, 0) + + } else if(PREDICT_TRUE(db->nsessions == 1)) { + + /* Destn is not same as in main db. Multiple session + * scenario + */ + dest_info.k.vrf = db->in2out_key.k.vrf; + session_db = cnat_handle_1to2_session(db, &dest_info); + + if(PREDICT_FALSE(session_db == NULL)) { + disposition = CNAT_V4_UDP_O2I_E; + counter = CNAT_V4_UDP_O2I_SESSION_DROP; + goto drop_pkt; + } + + /* update session_db(cur packet) timer */ + CNAT_UDP_OUTSIDE_UPDATE_FLAG_TIMER(session_db, 0) + } else { + /* More 2 sessions exists */ + + dest_info.k.vrf = db->in2out_key.k.vrf; + + /* If session already exists, + * cnat_create_session_db_entry will return the existing db + * else create a new db + * If could not create, return NULL + */ + session_db = cnat_create_session_db_entry(&dest_info, + db, TRUE); + + if(PREDICT_FALSE(session_db != NULL)) { + /* session exists */ + CNAT_UDP_OUTSIDE_UPDATE_FLAG_TIMER(session_db, 0) + } else { + /* could not create session db - drop packet */ + disposition = CNAT_V4_UDP_O2I_E; + counter = CNAT_V4_UDP_O2I_SESSION_DROP; + goto drop_pkt; + } + } + } + +update_pkt: + + /* + * 1. update dest ipv4 addr and dest udp port + * 2. update ipv4 checksum and udp checksum + */ + //swap_ip_dst(ip, db, db->in2out_key.k.vrf); + swap_ip_dst_udp_port(ip, udp, db, db->in2out_key.k.vrf); + //DSLITE_PRINTF(1, "Done with swap_ip_dst_udp_port..\n"); + + db->out2in_pkts++; + + nat44_dslite_global_stats[0].out2in_forwarding_count++; + + /* #### Temporarily COMMENTED FOR IP ROUTE LOOKUP ISSUE #### */ + + //PLATFORM_CNAT_SET_TX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_TX], + // db->in2out_key.k.vrf) + } else { + disposition = CNAT_V4_UDP_O2I_E; + counter = CNAT_V4_UDP_O2I_MISS_PKT; + /* for NAT44 dslite_id would be 1 */ + nat44_dslite_common_stats[0].no_translation_entry_drops ++; + } + +drop_pkt: + + em->counters[node_counter_base_index + counter] += 1; + return disposition; +} + +#include <vnet/pipeline.h> + +static uword cnat_ipv4_udp_outside_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + + +VLIB_REGISTER_NODE (cnat_ipv4_udp_outside_input_node) = { + .function = cnat_ipv4_udp_outside_input_node_fn, + .name = "vcgn-v4-udp-o2i", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(cnat_ipv4_udp_outside_input_error_strings), + .error_strings = cnat_ipv4_udp_outside_input_error_strings, + + .n_next_nodes = CNAT_V4_UDP_O2I_NEXT, + + /* edit / add dispositions here */ +#if 0 + .next_nodes = { + //[CNAT_V4_O2I_FIXME] = "error-drop", + //[CNAT_V4_UDP_O2I_E] = "vcgn-v4-udp-o2i-e", + [CNAT_V4_UDP_O2I_E] = "vcgn-v4-udp-o2i-e", + [CNAT_V4_UDP_O2I_T] = "ip4-input", + }, +#endif + .next_nodes = { + [CNAT_V4_UDP_O2I_E] = "error-drop", + [CNAT_V4_UDP_O2I_T] = "ip4-input", + }, + +}; + +clib_error_t *cnat_ipv4_udp_outside_input_init (vlib_main_t *vm) +{ + cnat_ipv4_udp_outside_input_main_t * mp = &cnat_ipv4_udp_outside_input_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_ipv4_udp_outside_input_init); diff --git a/vnet/vnet/vcgn/cnat_log_api.h b/vnet/vnet/vcgn/cnat_log_api.h new file mode 100644 index 00000000000..60cf683697d --- /dev/null +++ b/vnet/vnet/vcgn/cnat_log_api.h @@ -0,0 +1,114 @@ +/* + *------------------------------------------------------------------ + * cnat_log_api.h + * Declraes the common APIs for logging (both syslog and NFV9) + * Copyright (c) 2013, 20122 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_LOG_API_H__ +#define __CNAT_LOG_API_H__ + +#include "cnat_logging.h" + +static inline void cnat_log_ds_lite_mapping_delete(cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + return; +} + +static inline void cnat_log_ds_lite_mapping_create(cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + return; +} + +static inline void cnat_log_ds_lite_port_limit_exceeded( + dslite_key_t * key, + dslite_table_entry_t *dslite_entry_ptr) +{ + return; + +} + +static inline void cnat_log_nat44_port_limit_exceeded( + cnat_key_t * key, + cnat_vrfmap_t *vrfmap) +{ + return; +} +static inline void cnat_log_nat44_mapping_create(cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + return; +} + +static inline void cnat_log_nat44_mapping_delete(cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + return; +} + +/* Session Logging API for nat44 */ +static inline void cnat_session_log_nat44_mapping_create ( + cnat_main_db_entry_t *db, + cnat_session_entry_t *sdb, + cnat_vrfmap_t *vrfmap ) +{ + return; +} + +static inline void cnat_session_log_nat44_mapping_delete ( + cnat_main_db_entry_t *db, + cnat_session_entry_t *sdb, + cnat_vrfmap_t *vrfmap ) +{ + return; +} + +/* Session Logging API for dslite */ +static inline void cnat_session_log_ds_lite_mapping_create ( + cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry, + cnat_session_entry_t *sdb ) +{ + return; +} + +static inline void cnat_session_log_ds_lite_mapping_delete ( + cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry, + cnat_session_entry_t *sdb ) +{ + return; +} + +#endif /* #ifndef __CNAT_LOG_API_H__ */ + diff --git a/vnet/vnet/vcgn/cnat_log_common.h b/vnet/vnet/vcgn/cnat_log_common.h new file mode 100644 index 00000000000..52731bc0028 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_log_common.h @@ -0,0 +1,79 @@ +/* + *------------------------------------------------------------------ + * cnat_log_common.h + * Contains macros and definitions that are common to both syslog and nfv9 + * Copyright (c) 2011-2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_LOG_COMMON_H__ +#define __CNAT_LOG_COMMON_H__ + +#include <stdio.h> +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +#include "cnat_db.h" +#include "nat64_db.h" +#include "spp_timers.h" +#include "spp_ctx.h" + +/* + * This corresponds to the length of the IMETRO SHIM Header for RODDICK + * For non-roddick cases, introduce an Ethernet header as well + */ +#if 0 + #if defined(TARGET_RODDICK) + #define CNAT_NFV9_SHIM_HDR_OFFSET 8 + #define CNAT_NFV9_L2_ENCAPS_OFFSET 0 + #else + #define CNAT_NFV9_SHIM_HDR_OFFSET 0 + #define CNAT_NFV9_L2_ENCAPS_OFFSET 16 + #endif +#endif + + #define CNAT_NFV9_IP_HDR_OFFSET 0 + + #define CNAT_NFV9_UDP_HDR_OFFSET \ + (CNAT_NFV9_IP_HDR_OFFSET + sizeof(ipv4_header)) + + #define CNAT_NFV9_HDR_OFFSET \ + (CNAT_NFV9_UDP_HDR_OFFSET + sizeof(udp_hdr_type_t)) + +u32 cnat_get_sys_up_time_in_ms(void); +u32 cnat_get_unix_time_in_seconds(void); +void cnat_dump_time_change_logs(void); +void cnat_handle_sys_time_change (time_t current_unix_time); +/* + * Maximum number of time log changes we maintain + */ + +#define MAX_TIME_CHANGE_LOGS (8) + +typedef struct { + /* + * A timer structure to periodically send NFv9 & syslog logging packets + * that have been waiting to be full for a long time. This will + * ensure add/delete events don't get delayed too much before they + * are sent to the collector. + */ + spp_timer_t log_timer; + + /* + * Whether we have initialized the NFv9 information + */ + u8 cnat_log_init_done; +} cnat_log_global_info_t; + +#endif /* __CNAT_LOG_COMMON_H__ */ diff --git a/vnet/vnet/vcgn/cnat_logging.c b/vnet/vnet/vcgn/cnat_logging.c new file mode 100644 index 00000000000..eace942c657 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_logging.c @@ -0,0 +1,3566 @@ +/* + *------------------------------------------------------------------ + * cnat_logging.c + * + * Copyright (c) 2009-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/ip4.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/format.h> +#include <vnet/ip/udp.h> + + +#include "cnat_config.h" +#include "cnat_global.h" +#include "cnat_v4_functions.h" +#include "tcp_header_definitions.h" +#include "cnat_v4_ftp_alg.h" +#include "cnat_logging.h" +#include "platform_common.h" + +#define CNAT_NFV9_DEBUG_CODE 2 +#if CNAT_NFV9_DEBUG_CODE > 3 + +#define NFV9_COND if ((my_instance_number != 0) && (my_instance_number != 15)) + +#define NFV9_DEBUG_PRINTF1(a) NFV9_COND printf(a); +#define NFV9_DEBUG_PRINTF2(a, b) NFV9_COND printf(a, b); +#define NFV9_DEBUG_PRINTF3(a, b, c) NFV9_COND printf(a, b, c); +#define NFV9_DEBUG_PRINTF4(a, b, c, d) NFV9_COND printf(a, b, c, d); + +#else + +#define NFV9_DEBUG_PRINTF1(a) +#define NFV9_DEBUG_PRINTF2(a, b) +#define NFV9_DEBUG_PRINTF3(a, b, c) +#define NFV9_DEBUG_PRINTF4(a, b, c, d) + +#endif + +static void cnat_nfv9_insert_ingress_vrfid_name_record(cnat_nfv9_logging_info_t *nfv9_logging_info, u16 index); +void cnat_nfv9_ingress_vrfid_name_mapping_create( + cnat_nfv9_logging_info_t *nfv9_logging_info); + + +cnat_nfv9_global_info_t cnat_nfv9_global_info; + +cnat_nfv9_template_t cnat_nfv9_template_info; + +#define CNAT_NFV9_OPTION_TEMPLATE cnat_nfv9_template_info.cnat_nfv9_option_template + +u16 cnat_template_id[MAX_RECORDS] = + {0, CNAT_NFV9_ADD_TEMPLATE_ID, CNAT_NFV9_DEL_TEMPLATE_ID, + CNAT_NFV9_NAT64_ADD_BIB_TEMPLATE_ID,CNAT_NFV9_NAT64_DEL_BIB_TEMPLATE_ID, + CNAT_NFV9_NAT64_ADD_SESSION_TEMPLATE_ID, + CNAT_NFV9_NAT64_DEL_SESSION_TEMPLATE_ID, + CNAT_NFV9_DS_LITE_ADD_TEMPLATE_ID, + CNAT_NFV9_DS_LITE_DEL_TEMPLATE_ID +#ifndef NO_BULK_LOGGING + , CNAT_NFV9_NAT44_BULK_ADD_TEMPLATE_ID, + CNAT_NFV9_NAT44_BULK_DEL_TEMPLATE_ID, + CNAT_NFV9_DS_LITE_BULK_ADD_TEMPLATE_ID, + CNAT_NFV9_DS_LITE_BULK_DEL_TEMPLATE_ID +#endif /* #ifndef NO_BULK_LOGGING */ + , CNAT_NFV9_INGRESS_VRF_ID_NAME_TEMPLATE_ID, + CNAT_NFV9_NAT44_ADD_SESSION_TEMPLATE_ID, + CNAT_NFV9_NAT44_DEL_SESSION_TEMPLATE_ID, + CNAT_NFV9_DS_LITE_ADD_SESSION_TEMPLATE_ID, + CNAT_NFV9_DS_LITE_DEL_SESSION_TEMPLATE_ID + }; + +/* + * Logging information structures + */ +cnat_nfv9_logging_info_t cnat_default_nfv9_logging_info; +cnat_nfv9_logging_info_t *cnat_nfv9_logging_info_pool; +#define NFV9_SERVER_POOL_SIZE 16 +nfv9_server_info_t *nfv9_server_info_pool; + +u32 nfv9_src_id = 0; + +u32 +cnat_get_sys_up_time_in_ms (void) +{ + vlib_main_t * vm = vlib_get_main(); + u32 cnat_curr_time; + + cnat_curr_time = (u32)vlib_time_now (vm); + return cnat_curr_time; +} + +void +cnat_dump_time_change_logs (void) +{ + return; +} + +inline void cnat_nfv9_handle_sys_time_change(time_t current_unix_time) +{ + return; + #if 0 + cnat_handle_sys_time_change(current_unix_time); + #endif +} + +void cnat_nfv9_update_sys_time_change() +{ + cnat_nfv9_logging_info_t *my_nfv9_logging_info = NULL; + pool_foreach (my_nfv9_logging_info, cnat_nfv9_logging_info_pool, ({ + nfv9_server_info_t *server = nfv9_server_info_pool + + my_nfv9_logging_info->server_index; + server->last_template_sent_time = 0; + })); +} + +void nfv9_params_show(u32 logging_index) +{ + cnat_nfv9_logging_info_t *log_info; + if(logging_index == EMPTY) { + PLATFORM_DEBUG_PRINT("\nNetflow logging not configured\n"); + return; + } + + log_info = cnat_nfv9_logging_info_pool + logging_index; + nfv9_server_info_t *server __attribute__((unused)) + = nfv9_server_info_pool + log_info->server_index; + + + PLATFORM_DEBUG_PRINT("\nNetflow parameters --\n"); + PLATFORM_DEBUG_PRINT("Server index %d IPV4 address: %x, port %d, max log size %d\n", + log_info->server_index, server->ipv4_address, + server->port, log_info->max_length_minus_max_record_size); + + PLATFORM_DEBUG_PRINT("Server ref count %d Refresh rate %d timeout rate %d\n", + server->ref_count, server->refresh_rate, + server->timeout_rate); + +} + +/* + * Code to dump NFV9 packets before they are sent + */ +void +cnat_nfv9_dump_logging_context (u32 value1, + cnat_nfv9_logging_info_t *nfv9_logging_info, + u32 value2) +{ + u8 *pkt_ptr; + u32 i; + u32 next_nfv9_template_data_index = 0xffff; + u32 next_data_flow_index = 0xffff; + u32 next_data_record = 0xffff; + u32 data_record_size = 0; + vlib_main_t *vm = vlib_get_main(); + + nfv9_server_info_t *server = nfv9_server_info_pool + + nfv9_logging_info->server_index; + + vlib_cli_output(vm,"\nDumping %s packet at locn %d: time 0x%x", + (value2 == 1) ? "CURRENT" : "QUEUED", + value1, + cnat_nfv9_get_unix_time_in_seconds()); + + vlib_cli_output(vm, "\ni_vrf 0x%x, ip_address 0x%x, port %d", + nfv9_logging_info->i_vrf, + server->ipv4_address, + server->port); + + vlib_cli_output(vm,"\nseq_num %d", + server->sequence_num); + + vlib_cli_output(vm,"\nlast_template_sent time 0x%x, pkts_since_last_template %d", + server->last_template_sent_time, + server->pkts_since_last_template); + + vlib_cli_output(vm, "\npkt_len %d, add_rec_len %d, del_rec_len %d, total_rec_count %d", + nfv9_logging_info->pkt_length, + nfv9_logging_info->record_length[NAT44_ADD_RECORD], + nfv9_logging_info->record_length[NAT44_DEL_RECORD], + nfv9_logging_info->total_record_count); + + vlib_cli_output(vm,"\nbulk_add_rec_len %d, bulk_del_rec_len %d", + nfv9_logging_info->record_length[NAT44_BULK_ADD_RECORD], + nfv9_logging_info->record_length[NAT44_BULK_DEL_RECORD]); + + vlib_cli_output(vm,"\ncurr_logging_ctx 0x%p, timestamp 0x%x, queued_logging_ctx 0x%p", + nfv9_logging_info->current_logging_context, + nfv9_logging_info->current_logging_context_timestamp, + nfv9_logging_info->queued_logging_context); + + vlib_cli_output(vm,"\nnfv9_hdr 0x%p, tmpl_hdr 0x%p, dataflow_hdr 0x%p", + nfv9_logging_info->nfv9_header, + nfv9_logging_info->nfv9_template_header, + nfv9_logging_info->dataflow_header); + + vlib_cli_output(vm,"\nadd_rec 0x%p, del_rec 0x%p, next_data_ptr 0x%p", + nfv9_logging_info->record[NAT44_ADD_RECORD], + nfv9_logging_info->record[NAT44_DEL_RECORD], + nfv9_logging_info->next_data_ptr); + + vlib_cli_output(vm,"\n"); + + pkt_ptr = vlib_buffer_get_current(nfv9_logging_info->current_logging_context); + /* + * Dump along with 8 bytes of SHIM header + */ + for (i = 0; i < (nfv9_logging_info->pkt_length + CNAT_NFV9_IP_HDR_OFFSET); + i = i + 1) { + u8 c1, c2, c3; + if (i == CNAT_NFV9_IP_HDR_OFFSET) { + vlib_cli_output(vm,"\nIP_HEADER: \n"); + } else if (i == CNAT_NFV9_UDP_HDR_OFFSET) { + vlib_cli_output(vm,"\nUDP_HEADER: \n"); + } else if (i == CNAT_NFV9_HDR_OFFSET) { + vlib_cli_output(vm,"\nNFV9 Header: Version:Count: \n"); + } else if (i == (CNAT_NFV9_HDR_OFFSET+4)) { + vlib_cli_output(vm,"\nBoot_Up_Time_In_ms: \n"); + } else if (i == (CNAT_NFV9_HDR_OFFSET+8)) { + vlib_cli_output(vm, "\nUNIX_Time: \n"); + } else if (i == (CNAT_NFV9_HDR_OFFSET+12)) { + vlib_cli_output(vm,"\nSeq_Num: \n"); + } else if (i == (CNAT_NFV9_HDR_OFFSET+16)) { + vlib_cli_output(vm,"\nSource ID: \n"); + } else if (i == (CNAT_NFV9_HDR_OFFSET+20)) { + if (nfv9_logging_info->nfv9_template_header) { + vlib_cli_output(vm,"\nNFV9 TEMPLATE HDR: \n"); + next_nfv9_template_data_index = i + 4; + } else { + next_data_flow_index = i; + } + } else if (i == (CNAT_NFV9_TEMPLATE_OFFSET+CNAT_NFV9_TEMPLATE_LENGTH)) { + if (nfv9_logging_info->nfv9_template_header) { + next_data_flow_index = i; + } + } + + if (i == next_nfv9_template_data_index) { + vlib_cli_output(vm,"\nNFV9 TEMPLATE DATA: \n"); + } else if (i == next_data_flow_index) { + if (*(pkt_ptr + i) == 0x01) { + if (*(pkt_ptr + i + 1) == 0x00) { + data_record_size = 21; + next_data_record = i + 4; + next_data_flow_index = i + *(pkt_ptr + i + 3) + + *(pkt_ptr + i + 2)*0x100; + vlib_cli_output(vm,"\nADD_RECORD (total %d): next_data_flow_index (%d->%d)\n", (next_data_flow_index - i), i, next_data_flow_index); + } else if (*(pkt_ptr + i + 1) == 0x01) { + data_record_size = 11; + next_data_record = i + 4; + next_data_flow_index = i + *(pkt_ptr + i + 3) + + *(pkt_ptr + i + 2)*0x100; + vlib_cli_output(vm,"\nDEL_RECORD (total %d) : next_data_flow_index (%d->%d)\n", (next_data_flow_index - i), i, next_data_flow_index); + } else if (*(pkt_ptr + i + 1) == 0x09) { + data_record_size = 20; + next_data_record = i + 4; + next_data_flow_index = i + *(pkt_ptr + i + 3) + + *(pkt_ptr + i + 2)*0x100; + vlib_cli_output(vm,"\nBULK_ADD_RECORD (total %d) : next_data_flow_index (%d->%d)\n", (next_data_flow_index - i), i, next_data_flow_index); + } else if (*(pkt_ptr + i + 1) == 0x0a) { + data_record_size = 10; + next_data_record = i + 4; + next_data_flow_index = i + *(pkt_ptr + i + 3) + + *(pkt_ptr + i + 2)*0x100; + vlib_cli_output(vm,"\nBULK_DEL_RECORD (total %d) : next_data_flow_index (%d->%d)\n", (next_data_flow_index - i), i, next_data_flow_index); + } + + } + } else if (i == next_data_record) { + vlib_cli_output(vm,"\n"); + next_data_record += data_record_size; + } + + c3 = *(pkt_ptr + i); + + c2 = c3 & 0xf; + c1 = (c3 >> 4) & 0xf; + + + vlib_cli_output(vm,"%c%c ", + ((c1 <= 9) ? (c1 + '0') : (c1 - 10 + 'a')), + ((c2 <= 9) ? (c2 + '0') : (c2 - 10 + 'a'))); + + } + vlib_cli_output(vm,"\n"); +} + +/* + * edt: * * cnat_nfv9_pad_added_to_an_addr + * + * Returns the difference (no# of bytes) between new_addr + * & org_addr + * + * Argument: u8 *new_addr, u8 *org_addr + * returns the difference + */ +inline +int cnat_nfv9_pad_added_to_an_addr(u8 *new_addr, u8 *org_addr) +{ + uword addr1 = (uword) new_addr; + uword addr2 = (uword) org_addr; + return (addr1 - addr2); +} + +/* + * edt: * * cnat_nfv9_add_end_of_record_padding + * + * Tries to add padding to data_ptr to ensure it is word aligned + * + * Argument: u8 * data_ptr + * pointer to the data pointer + */ +inline +u8 *cnat_nfv9_add_end_of_record_padding (u8 *data_ptr) +{ + uword tmp = (uword) data_ptr; + uword pad_value = (uword) NFV9_PAD_VALUE; + + tmp = (tmp + pad_value) & (~pad_value); + + return ((u8 *) tmp); +} + +/* + * edt: * * cnat_nfv9_pad_end_of_record_length + * + * Tries to add padding to data_ptr to ensure it is word aligned + * + * Argument: u8 * data_ptr + * pointer to the data pointer + */ +inline +u16 cnat_nfv9_pad_end_of_record_length (u16 record_length) +{ + u16 pad_value = NFV9_PAD_VALUE; + + return ((record_length + pad_value) & (~pad_value)); +} + +/* get first interface address */ +static ip4_address_t * +ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = 0; + ip4_address_t * result = 0; + + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip4_address_t * a = ip_interface_address_get_address (lm, ia); + result = a; + break; + })); + return result; +} + +void fill_ip_n_udp_hdr (u32 ipv4_addr, u16 port, + cnat_nfv9_logging_info_t *nfv9_logging_info) +{ + vlib_buffer_t * b0 = nfv9_logging_info->current_logging_context; + ipv4_header *ip_header = vlib_buffer_get_current(b0); + udp_hdr_type_t *udp_header = (udp_hdr_type_t *)((u8*)ip_header + sizeof(ipv4_header)); + vlib_main_t *vm = vlib_get_main(); + u16 ip_length __attribute__((unused)); + u16 pkt_len = nfv9_logging_info->pkt_length; + ip4_address_t *ia0 = 0; + u16 src_port = 0x0a0a; + + /* + * Clear the SHIM header fields. The PD nodes will set it + * appropriately. + */ + PLATFORM_MEMSET_CNAT_LOG_PKT_DATA + + /* + * Don't need a special define for 0x45 - IP version and hdr len + */ + ip_header->version_hdr_len_words = 0x45; + ip_header->tos = 0; + ip_header->frag_flags_offset = 0; + ip_header->ttl = 0xff; + ip_header->protocol = UDP_PROT; + ip_header->dest_addr = clib_host_to_net_u32(ipv4_addr); + ip_length = vlib_buffer_length_in_chain (vm, b0); + ip_header->total_len_bytes = clib_host_to_net_u16(pkt_len); + ia0 = ip4_interface_first_address(&ip4_main, nfv9_logging_info->i_vrf_id); + ip_header->src_addr = ia0->as_u32; + udp_header->src_port = clib_host_to_net_u16(src_port); + udp_header->dest_port = clib_host_to_net_u16(port); + udp_header->udp_checksum = 0; + udp_header->udp_length = + clib_host_to_net_u16(pkt_len - sizeof(ipv4_header)); + ip_header->checksum = ip4_header_checksum((ip4_header_t *)ip_header); +} + +/* + * edt: * * cnat_nfv9_fill_nfv9_ip_header + * + * Tries to fill the fields of the IP header before it + * is sent to the L3 infra node. + * + * Argument: cnat_nfv9_logging_info_t *nfv9_logging_info + * structure that contains the packet context + */ +inline +void cnat_nfv9_fill_nfv9_ip_header (cnat_nfv9_logging_info_t *nfv9_logging_info) +{ + u16 new_record_length = 0; + u16 orig_record_length = 0; + vlib_buffer_t * b0 = nfv9_logging_info->current_logging_context; + + /* + * Fill in the IP header and port number of the Netflow collector + * The L3 Infra node will fill in the rest of the fields + */ + + nfv9_logging_info->nfv9_header->count = + clib_host_to_net_u16(nfv9_logging_info->total_record_count); + + /* + * Pad the last add/del record to ensure multiple of 4 bytes + */ + + if(nfv9_logging_info->last_record != RECORD_INVALID) { + + orig_record_length = + nfv9_logging_info->record_length[nfv9_logging_info->last_record]; + + new_record_length = cnat_nfv9_pad_end_of_record_length( + orig_record_length); + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u16(new_record_length); + } + + /* + * If the record is padded, ensure the padded bytes are ZERO + */ + if (PREDICT_TRUE(new_record_length - orig_record_length)) { + u8 *pkt_ptr = (u8 *) (b0 + nfv9_logging_info->pkt_length); + + /* + * Blindly copy 3 bytes of data to Zero to avoid for loops + * We have sufficient padding bytes for safety and we won't + * go over buffer limits + */ + *pkt_ptr++ = 0; + *pkt_ptr++ = 0; + *pkt_ptr++ = 0; + + nfv9_logging_info->pkt_length += + (new_record_length - orig_record_length); + } + nfv9_server_info_t *server = nfv9_server_info_pool + + nfv9_logging_info->server_index; + fill_ip_n_udp_hdr(server->ipv4_address, + server->port, nfv9_logging_info); + /* + * It is important to set the sw_if_index for the new buffer create + */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = (u32)~0; + +} + +/* + * edt: * * cnat_nfv9_send_queued_pkt + * + * Tries to send a logging pkt that has been queued earlier + * because it could not be sent due to downstream constipation + * + * Argument: cnat_nfv9_logging_info_t *nfv9_logging_info + * structure that contains the packet context + */ +inline +void cnat_nfv9_send_queued_pkt (cnat_nfv9_logging_info_t *nfv9_logging_info) +{ +} + +/* + * edt: * * cnat_nfv9_send_pkt + * + * Tries to send a logging pkt. If the packet cannot be sent + * because of rewrite_output node cannot process it, queue + * it temporarily and try to send it later. + * + * Argument: cnat_nfv9_logging_info_t *nfv9_logging_info + * structure that contains the packet context + */ +inline +void cnat_nfv9_send_pkt (cnat_nfv9_logging_info_t *nfv9_logging_info) +{ + cnat_nfv9_fill_nfv9_ip_header(nfv9_logging_info); + + nfv9_server_info_t *server = nfv9_server_info_pool + + nfv9_logging_info->server_index; + + /* Update sequence number just before sending. + * So that, multiple NAT44/NAT64/DSLite instances sharing a + * a single server instance can stamp the sequence number + * in the right sequence (as seen by the server). + */ + server->sequence_num += 1; + nfv9_logging_info->nfv9_header->sequence_num = + clib_host_to_net_u32(server->sequence_num); + +#if DEBUG + cnat_nfv9_dump_logging_context (2, nfv9_logging_info, 1); +#endif +#if 0 /* commented out below */ + send_vpp3_nfv9_pkt(nfv9_logging_info); +#endif + nfv9_logging_info->current_logging_context = NULL; + /* + * Increase last packet sent count + */ + server->pkts_since_last_template++; + + /* + * If we are sending an nfv9 tempate with this packet + * log this timestamp + */ + if (nfv9_logging_info->nfv9_template_header) { + server->last_template_sent_time = + cnat_nfv9_get_unix_time_in_seconds(); + server->pkts_since_last_template = 0; + } + + return; +} + +/* + * send_vpp3_nfv9_pkt: to send multiple b0 in a frame + */ +#if 0 +inline void send_vpp3_nfv9_pkt (cnat_nfv9_logging_info_t *nfv9_logging_info) +{ + vlib_main_t *vm = vlib_get_main(); + vlib_frame_t *f; + vlib_buffer_t *b0; + u32 ip4_input_node_index; + //u32 * to_next, * from, bi0 =0; + u32 bi0 =0; + ipv4_header * h0, *ip; + static u32 * buffers; + u32 nalloc; + udp_header_t * udp; + u16 udp_length, ip_length; + + + ip4_input_node_index = nfv9_logging_info->ip4_input_node_index; + f = nfv9_logging_info->f; + if (f == NULL) { + nfv9_logging_info->f = vlib_get_frame_to_node(vm, ip4_input_node_index); + f = nfv9_logging_info->f; + f->n_vectors = 0; + nfv9_logging_info->to_next = vlib_frame_vector_args (f); + } + /* Build a pkt from whole cloth */ + b0 = nfv9_logging_info->current_logging_context; + //to_next = nfv9_logging_info->to_next; + ip = vlib_buffer_get_current (b0); + //if (PREDICT_TRUE(f->n_vectors < VLIB_FRAME_SIZE)) { + if (PREDICT_TRUE(f->n_vectors < 5)) { + + b0->current_length = clib_net_to_host_u16(ip->total_len_bytes); + bi0 = vlib_get_buffer_index (vm, b0); + nfv9_logging_info->to_next[0] = bi0; + printf("f->n_vec %d f %p to_next %p val %d b0 %p\n", + f->n_vectors, f, nfv9_logging_info->to_next, + nfv9_logging_info->to_next[0], b0); + //to_next++; + nfv9_logging_info->to_next++; // = to_next; + f->n_vectors++; + } + + //if (f->n_vectors == VLIB_FRAME_SIZE) + if (f->n_vectors == 5) { + printf("sending pkt on 256\n"); + printf("%s: total_len_bytes %d bi %d nfv9_logging_info->pkt_length %d index %d\n", + __func__, clib_net_to_host_u16(ip->total_len_bytes), + bi0, nfv9_logging_info->pkt_length, ip4_input_node_index); + vlib_put_frame_to_node(vm, ip4_input_node_index, f); + nfv9_logging_info->f = NULL; + nfv9_logging_info->to_next = NULL; + } + return; +} +#endif +/* + * send_vpp3_nfv9_pkt: to send one b0 in a frame + */ +inline void send_vpp3_nfv9_pkt (cnat_nfv9_logging_info_t *nfv9_logging_info) +{ + vlib_node_t *output_node; + vlib_main_t *vm = vlib_get_main(); + vlib_frame_t *f; + vlib_buffer_t *b0; + u32 *to_next; + u32 bi=0; + ipv4_header *ip; + + //Lets check and send it to ip4-lookup node + output_node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup"); + f = vlib_get_frame_to_node (vm, output_node->index); + + if ( nfv9_logging_info->current_logging_context != NULL) { + /* Build a pkt from whole cloth */ + b0 = nfv9_logging_info->current_logging_context; + ip = vlib_buffer_get_current(b0); + to_next = vlib_frame_vector_args (f); + bi = vlib_get_buffer_index (vm, b0); + to_next[0] = bi; + + f->n_vectors = 1; + b0->current_length = clib_net_to_host_u16(ip->total_len_bytes); + vlib_put_frame_to_node (vm, output_node->index, f); + } + return; +} +/* + * edt: * * cnat_nfv9_send_pkt_always_success + * + * Tries to send a logging pkt. This cannot fail due to downstream + * constipation because we have already checked if the rewrite_output + * node can accept it. + * + * Argument: cnat_nfv9_logging_info_t *nfv9_logging_info + * structure that contains the packet context + * + * Argument: vlib_node_t *output_node + * vlib_node_t structure for rewrite_output node + */ +inline +void cnat_nfv9_send_pkt_always_success ( + cnat_nfv9_logging_info_t *nfv9_logging_info, + vlib_node_t *output_node) +{ + nfv9_server_info_t *server = nfv9_server_info_pool + + nfv9_logging_info->server_index; + vlib_main_t * vm = vlib_get_main(); + + /* + * At this point we either have a current or queued logging context + */ + if (PREDICT_TRUE(nfv9_logging_info->current_logging_context != NULL)) { + server->sequence_num += 1; + nfv9_logging_info->nfv9_header->sequence_num = + clib_host_to_net_u32(server->sequence_num); + cnat_nfv9_fill_nfv9_ip_header(nfv9_logging_info); + + nfv9_logging_info->current_logging_context->current_length = + nfv9_logging_info->pkt_length; + vlib_cli_output(vm, "\nNFV9: 3. Sending Current packet\n"); +#if DEBUG + cnat_nfv9_dump_logging_context (3, nfv9_logging_info, 1); +#endif + send_vpp3_nfv9_pkt(nfv9_logging_info); + nfv9_logging_info->current_logging_context = NULL; + } else { + /* + * For queued logging context, nfv9_header-> count is already set + */ + nfv9_logging_info->queued_logging_context->current_length = + nfv9_logging_info->pkt_length; + vlib_cli_output(vm,"\nNFV9: 4. Sending Queued packet\n"); +#if DEBUG + cnat_nfv9_dump_logging_context (4, nfv9_logging_info, 2); +#endif + send_vpp3_nfv9_pkt(nfv9_logging_info); + + nfv9_logging_info->queued_logging_context = NULL; + } + + /* + * NF Logging info already deleted, just free it and return + */ + if (PREDICT_FALSE(nfv9_logging_info->deleted)) { + pool_put(cnat_nfv9_logging_info_pool, nfv9_logging_info); + return; + } + + /* + * Increase last packet sent count and timestamp + */ + server->pkts_since_last_template++; + + /* + * If we are sending an nfv9 tempate with this packet + * log this timestamp + */ + if (nfv9_logging_info->nfv9_template_header) { + server->last_template_sent_time = + cnat_nfv9_get_unix_time_in_seconds(); + server->pkts_since_last_template = 0; + } +} + +/* + * edt: * * cnat_nfv9_create_logging_context + * + * Tries to create a logging context with packet buffer + * to send a new logging packet + * + * Argument: cnat_nfv9_logging_info_t *nfv9_logging_info + * structure that contains the nfv9 logging info and will store + * the packet context as well. + */ +inline +void cnat_nfv9_create_logging_context ( + cnat_nfv9_logging_info_t *nfv9_logging_info, + cnat_nfv9_template_add_flag_t template_flag) +{ + vlib_main_t *vm = vlib_get_main(); + vlib_buffer_t *b0; + static u32 bi; + u8 i; + + /* + * If queued_logging_context_index is non-EMPTY, we already have a logging + * packet queued to be sent. First try sending this before allocating + * a new context. We can have only one active packet context per + * nfv9_logging_info structure + */ + if (PREDICT_FALSE(nfv9_logging_info->queued_logging_context != NULL)) { + cnat_nfv9_send_queued_pkt(nfv9_logging_info); + /* + * If we cannot still send the queued pkt, just return + * Downstream Constipation count would have increased anyway + */ + if (nfv9_logging_info->queued_logging_context != NULL) { + cnat_global_counters.nfv9_logging_context_creation_deferred_count++; + return; + } + } + + + /* + * No context can be allocated, return silently + * calling routine will handle updating the error counters + */ + if (vlib_buffer_alloc (vm, &bi, 1) != 1) { + vlib_cli_output(vm, "buffer allocation failure"); + return; + } + /* Build a pkt from whole cloth */ + b0 = vlib_get_buffer (vm, bi); + b0->current_data = 0; + + nfv9_server_info_t *server = nfv9_server_info_pool + + nfv9_logging_info->server_index; + + nfv9_logging_info->current_logging_context = b0; + nfv9_logging_info->current_logging_context_timestamp = + cnat_nfv9_get_sys_up_time_in_ms(); + + + nfv9_logging_info->nfv9_header = + (nfv9_header_t *) (vlib_buffer_get_current(b0) + + (sizeof(ipv4_header)) + + (sizeof(udp_hdr_type_t))); + + nfv9_logging_info->nfv9_header->version = + clib_host_to_net_u16(CNAT_NFV9_VERSION_NUMBER); + + nfv9_logging_info->nfv9_header->sys_up_time = + clib_host_to_net_u32(cnat_nfv9_get_sys_up_time_in_ms()); + + nfv9_logging_info->nfv9_header->timestamp = + clib_host_to_net_u32(cnat_nfv9_get_unix_time_in_seconds()); + + + nfv9_logging_info->nfv9_header->source_id = + clib_host_to_net_u32(nfv9_src_id); + + nfv9_logging_info->dataflow_header = 0; + + for(i = 0; i < MAX_RECORDS;i++) { + nfv9_logging_info->record[i] = NULL; + nfv9_logging_info->record_length[i] = 0; + } + nfv9_logging_info->last_record = 0; + + + nfv9_logging_info->nfv9_template_header = 0; + nfv9_logging_info->next_data_ptr = + (u8 *) (vlib_buffer_get_current(b0) + + sizeof(ipv4_header) + sizeof(udp_hdr_type_t) + + sizeof(nfv9_header_t)); + + nfv9_logging_info->pkt_length = (CNAT_NFV9_TEMPLATE_OFFSET - + CNAT_NFV9_IP_HDR_OFFSET); + + + /* + * Now we have 0 records to start with + */ + + nfv9_logging_info->total_record_count = 0; + + if ((template_flag == cnat_nfv9_template_add_always) || + (server->pkts_since_last_template > + server->refresh_rate) || + ((cnat_nfv9_get_unix_time_in_seconds() - + server->last_template_sent_time) > + server->timeout_rate)) { + + /* + * Send a new template + */ + nfv9_logging_info->nfv9_template_header = + (cnat_nfv9_template_t *) nfv9_logging_info->next_data_ptr; + + memcpy(nfv9_logging_info->nfv9_template_header, + &cnat_nfv9_template_info, + sizeof(cnat_nfv9_template_info)); + + /* + * Templates are sent irrespective of particular service-type config + */ + nfv9_logging_info->total_record_count = MAX_RECORDS - 1; + + nfv9_logging_info->pkt_length += CNAT_NFV9_TEMPLATE_LENGTH; + + /* + * Set the data pointer beyond the template field + */ + nfv9_logging_info->next_data_ptr = + (u8 *) (nfv9_logging_info->nfv9_template_header + 1); + /* + * Setting template_sent flag as TRUE. this will be checked in + * handle_vrfid_name_mapping() + */ + server->template_sent = TEMPLATE_SENT_TRUE; + } +} + +inline +void cnat_nfv9_record_create ( + cnat_nfv9_logging_info_t *nfv9_logging_info, u16 cur_record) +{ + int byte_diff = 0; + u16 last_record = nfv9_logging_info->last_record; + + if(last_record != 0 && last_record != cur_record) { + u16 orig_length, new_length; + + orig_length = nfv9_logging_info->record_length[last_record]; + new_length = cnat_nfv9_pad_end_of_record_length(orig_length); + + /* + * The padding bytes are required after the last record + * Ensure length of last record accounts for padding bytes + */ + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u16(new_length); + + /* + * Since we are working on the del record, set add record to 0 + */ + nfv9_logging_info->record[last_record] = 0; + + nfv9_logging_info->record_length[last_record] = 0; + + nfv9_logging_info->last_record = 0; + } + + nfv9_logging_info->last_record = cur_record; + + /* + * The padding bytes are required after the last record + * Ensure that we skip over the padding bytes + */ + nfv9_logging_info->dataflow_header = (nfv9_dataflow_record_header_t *) + cnat_nfv9_add_end_of_record_padding(nfv9_logging_info->next_data_ptr); + /* + * Get the difference + */ + byte_diff = cnat_nfv9_pad_added_to_an_addr( + (u8 *)nfv9_logging_info->dataflow_header, + nfv9_logging_info->next_data_ptr); + if(byte_diff > 0) { + /* + * Update the packet length to account for the pad bytes + */ + nfv9_logging_info->pkt_length += byte_diff; + u8 *pkt_ptr = nfv9_logging_info->next_data_ptr; + + /* + * Blindly copy 3 bytes of data to Zero to avoid for loops + * We have sufficient padding bytes for safety and we won't + * go over buffer limits + */ + *pkt_ptr++ = 0; + *pkt_ptr++ = 0; + *pkt_ptr++ = 0; + } + /* + * Initialize the template_id and the length of the add record + */ + nfv9_logging_info->dataflow_header->dataflow_template_id = + clib_host_to_net_u16(cnat_template_id[cur_record]); + + nfv9_logging_info->record[cur_record] = + (u8 *) (nfv9_logging_info->dataflow_header + 1); + + nfv9_logging_info->record_length[cur_record] = + CNAT_NFV9_DATAFLOW_RECORD_HEADER_LENGTH; + + /* + * Update the length of the total NFV9 record + */ + nfv9_logging_info->pkt_length += + CNAT_NFV9_DATAFLOW_RECORD_HEADER_LENGTH; + + /* + * Set the data pointer beyond the dataflow header field + */ + nfv9_logging_info->next_data_ptr = + (u8 *) (nfv9_logging_info->dataflow_header + 1); + +} + +static void cnat_nfv9_insert_add_record( + cnat_nfv9_logging_info_t *nfv9_logging_info, + cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap) +{ + u16 my_proto_mask; + u8 my_protocol; + nfv9_add_record_t nfv9_logging_add_record; + if (PREDICT_FALSE(nfv9_logging_info->record[NAT44_ADD_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, NAT44_ADD_RECORD); + } + + /* + * We should definitely have add_record now, no need to sanitize + */ + + nfv9_logging_add_record.inside_vrf_id = + clib_host_to_net_u32(vrfmap->i_vrf_id); + + nfv9_logging_add_record.outside_vrf_id = + clib_host_to_net_u32(vrfmap->o_vrf_id); + + nfv9_logging_add_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + nfv9_logging_add_record.outside_ip_addr = + clib_host_to_net_u32(db->out2in_key.k.ipv4); + + nfv9_logging_add_record.inside_ip_port = + clib_host_to_net_u16(db->in2out_key.k.port); + nfv9_logging_add_record.outside_ip_port = + clib_host_to_net_u16(db->out2in_key.k.port); + + my_proto_mask = db->in2out_key.k.vrf & CNAT_PRO_MASK; + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? ICMP_PROT : GRE_PROT))); + + nfv9_logging_add_record.protocol = my_protocol; + + memcpy(nfv9_logging_info->record[NAT44_ADD_RECORD], + &nfv9_logging_add_record, CNAT_NFV9_ADD_RECORD_LENGTH); + + nfv9_logging_info->record_length[NAT44_ADD_RECORD] + += CNAT_NFV9_ADD_RECORD_LENGTH; + + nfv9_logging_info->pkt_length += CNAT_NFV9_ADD_RECORD_LENGTH; + + nfv9_logging_info->record[NAT44_ADD_RECORD] + += CNAT_NFV9_ADD_RECORD_LENGTH; + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[NAT44_ADD_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[NAT44_ADD_RECORD]); + +} + + +static void cnat_nfv9_ds_lite_insert_add_record( + cnat_nfv9_logging_info_t *nfv9_logging_info, + cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry) +{ + + nfv9_ds_lite_add_record_t nfv9_logging_add_record = {0}; + cnat_user_db_entry_t *udb = NULL; + u16 my_proto_mask; + u8 my_protocol; + + udb = cnat_user_db + db->user_index; + if (PREDICT_FALSE(!udb)) { + return; + } + if (PREDICT_FALSE(nfv9_logging_info->record[DS_LITE_ADD_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, DS_LITE_ADD_RECORD); + } + /* + * We should definitely have add_record now, no need to sanitize + */ + nfv9_logging_add_record.inside_vrf_id = + clib_host_to_net_u32(dslite_entry->i_vrf_id); + nfv9_logging_add_record.outside_vrf_id = + clib_host_to_net_u32(dslite_entry->o_vrf_id); + +#ifdef DSLITE_USER_IPV4 + nfv9_logging_add_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); +#else + /* + * Inside ipv4 address is sent as 0.0.0.0 for ds-lite case as + * ipv6 is user here. + */ + nfv9_logging_add_record.inside_ip_addr = 0; +#endif + + nfv9_logging_add_record.inside_v6_src_addr[0] = + clib_host_to_net_u32(udb->ipv6[0]); + nfv9_logging_add_record.inside_v6_src_addr[1] = + clib_host_to_net_u32(udb->ipv6[1]); + nfv9_logging_add_record.inside_v6_src_addr[2] = + clib_host_to_net_u32(udb->ipv6[2]); + nfv9_logging_add_record.inside_v6_src_addr[3] = + clib_host_to_net_u32(udb->ipv6[3]); + + nfv9_logging_add_record.outside_ip_addr = + clib_host_to_net_u32(db->out2in_key.k.ipv4); + + nfv9_logging_add_record.inside_ip_port = + clib_host_to_net_u16(db->in2out_key.k.port); + nfv9_logging_add_record.outside_ip_port = + clib_host_to_net_u16(db->out2in_key.k.port); + + my_proto_mask = db->in2out_key.k.vrf & CNAT_PRO_MASK; + + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? ICMP_PROT : 0))); + nfv9_logging_add_record.protocol = my_protocol; + + memcpy(nfv9_logging_info->record[DS_LITE_ADD_RECORD], + &nfv9_logging_add_record, CNAT_NFV9_DS_LITE_ADD_RECORD_LENGTH); + + nfv9_logging_info->record_length[DS_LITE_ADD_RECORD] + += CNAT_NFV9_DS_LITE_ADD_RECORD_LENGTH; + + nfv9_logging_info->pkt_length += CNAT_NFV9_DS_LITE_ADD_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + + nfv9_logging_info->record[DS_LITE_ADD_RECORD] + += CNAT_NFV9_DS_LITE_ADD_RECORD_LENGTH; + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[DS_LITE_ADD_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[DS_LITE_ADD_RECORD]); +} + + +static void cnat_nfv9_ds_lite_insert_del_record( + cnat_nfv9_logging_info_t *nfv9_logging_info, + cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry) +{ + + nfv9_ds_lite_del_record_t nfv9_logging_del_record = {0}; + cnat_user_db_entry_t *udb = NULL; + u16 my_proto_mask; + u8 my_protocol; + + udb = cnat_user_db + db->user_index; + if (PREDICT_FALSE(!udb)) { + return; + } + if (PREDICT_FALSE(nfv9_logging_info->record[DS_LITE_DEL_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, DS_LITE_DEL_RECORD); + } + /* + * We should definitely have a del record now. + * No need to sanitize + */ + nfv9_logging_del_record.inside_vrf_id = + clib_host_to_net_u32(dslite_entry->i_vrf_id); + +#ifdef DSLITE_USER_IPV4 + nfv9_logging_del_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); +#else + /* + * Inside ipv4 address is sent as 0.0.0.0 for ds-lite case as + * ipv6 is user here. + */ + nfv9_logging_del_record.inside_ip_addr = 0; +#endif + + nfv9_logging_del_record.inside_v6_src_addr[0] = + clib_host_to_net_u32(udb->ipv6[0]); + nfv9_logging_del_record.inside_v6_src_addr[1] = + clib_host_to_net_u32(udb->ipv6[1]); + nfv9_logging_del_record.inside_v6_src_addr[2] = + clib_host_to_net_u32(udb->ipv6[2]); + nfv9_logging_del_record.inside_v6_src_addr[3] = + clib_host_to_net_u32(udb->ipv6[3]); + + nfv9_logging_del_record.inside_ip_port = + clib_host_to_net_u16(db->in2out_key.k.port); + + my_proto_mask = db->in2out_key.k.vrf & CNAT_PRO_MASK; + + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? ICMP_PROT : 0))); + nfv9_logging_del_record.protocol = my_protocol; + + memcpy(nfv9_logging_info->record[DS_LITE_DEL_RECORD], + &nfv9_logging_del_record, CNAT_NFV9_DS_LITE_DEL_RECORD_LENGTH); + + nfv9_logging_info->record_length[DS_LITE_DEL_RECORD] += + CNAT_NFV9_DS_LITE_DEL_RECORD_LENGTH; + + nfv9_logging_info->pkt_length += CNAT_NFV9_DS_LITE_DEL_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + + nfv9_logging_info->record[DS_LITE_DEL_RECORD] + += CNAT_NFV9_DS_LITE_DEL_RECORD_LENGTH; + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[DS_LITE_DEL_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[DS_LITE_DEL_RECORD]); +} + +#ifndef NO_BULK_LOGGING +static void cnat_nfv9_insert_bulk_add_record( + cnat_nfv9_logging_info_t *nfv9_logging_info, + cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap, + int bulk_alloc_start_port) +{ + nfv9_bulk_add_record_t nfv9_logging_bulk_add_record; + bulk_alloc_size_t bulk_size = BULKSIZE_FROM_VRFMAP(vrfmap); + if (PREDICT_FALSE(nfv9_logging_info->record[NAT44_BULK_ADD_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, NAT44_BULK_ADD_RECORD); + } + + /* + * We should definitely have add_record now, no need to sanitize + */ + + nfv9_logging_bulk_add_record.inside_vrf_id = + clib_host_to_net_u32(vrfmap->i_vrf_id); + nfv9_logging_bulk_add_record.outside_vrf_id = + clib_host_to_net_u32(vrfmap->o_vrf_id); + + nfv9_logging_bulk_add_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + nfv9_logging_bulk_add_record.outside_ip_addr = + clib_host_to_net_u32(db->out2in_key.k.ipv4); + + nfv9_logging_bulk_add_record.outside_ip_port_start = + clib_host_to_net_u16(bulk_alloc_start_port); + nfv9_logging_bulk_add_record.outside_ip_port_end = + clib_host_to_net_u16(bulk_alloc_start_port + bulk_size -1); + + memcpy(nfv9_logging_info->record[NAT44_BULK_ADD_RECORD], + &nfv9_logging_bulk_add_record, CNAT_NFV9_BULK_ADD_RECORD_LENGTH); + + nfv9_logging_info->record_length[NAT44_BULK_ADD_RECORD] + += CNAT_NFV9_BULK_ADD_RECORD_LENGTH; + + nfv9_logging_info->pkt_length += CNAT_NFV9_BULK_ADD_RECORD_LENGTH; + + nfv9_logging_info->record[NAT44_BULK_ADD_RECORD] + += CNAT_NFV9_BULK_ADD_RECORD_LENGTH; + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[NAT44_BULK_ADD_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[NAT44_BULK_ADD_RECORD]); + +} + + +static void cnat_nfv9_ds_lite_insert_bulk_add_record( + cnat_nfv9_logging_info_t *nfv9_logging_info, + cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry, + int bulk_alloc_start_port) +{ + + nfv9_ds_lite_bulk_add_record_t nfv9_logging_bulk_add_record = {0}; + cnat_user_db_entry_t *udb = NULL; + bulk_alloc_size_t bulk_size = BULKSIZE_FROM_VRFMAP(dslite_entry); + + if (PREDICT_FALSE(nfv9_logging_info->record[DS_LITE_BULK_ADD_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, DS_LITE_BULK_ADD_RECORD); + } + udb = cnat_user_db + db->user_index; + if (PREDICT_FALSE(!udb)) { + return; + } + /* + * We should definitely have add_record now, no need to sanitize + */ + + nfv9_logging_bulk_add_record.inside_vrf_id = + clib_host_to_net_u32(dslite_entry->i_vrf_id); + nfv9_logging_bulk_add_record.outside_vrf_id = + clib_host_to_net_u32(dslite_entry->o_vrf_id); + +#ifdef DSLITE_USER_IPV4 + nfv9_logging_bulk_add_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); +#else + /* + * Inside ipv4 address is sent as 0.0.0.0 for ds-lite case as + * ipv6 is user here. + */ + nfv9_logging_bulk_add_record.inside_ip_addr = 0; +#endif + + nfv9_logging_bulk_add_record.inside_v6_src_addr[0] = + clib_host_to_net_u32(udb->ipv6[0]); + nfv9_logging_bulk_add_record.inside_v6_src_addr[1] = + clib_host_to_net_u32(udb->ipv6[1]); + nfv9_logging_bulk_add_record.inside_v6_src_addr[2] = + clib_host_to_net_u32(udb->ipv6[2]); + nfv9_logging_bulk_add_record.inside_v6_src_addr[3] = + clib_host_to_net_u32(udb->ipv6[3]); + + nfv9_logging_bulk_add_record.outside_ip_addr = + clib_host_to_net_u32(db->out2in_key.k.ipv4); + + nfv9_logging_bulk_add_record.outside_ip_port_start = + clib_host_to_net_u16(bulk_alloc_start_port); + nfv9_logging_bulk_add_record.outside_ip_port_end = + clib_host_to_net_u16(bulk_alloc_start_port + bulk_size -1); + + memcpy(nfv9_logging_info->record[DS_LITE_BULK_ADD_RECORD], + &nfv9_logging_bulk_add_record, CNAT_NFV9_DS_LITE_BULK_ADD_RECORD_LENGTH); + + nfv9_logging_info->record_length[DS_LITE_BULK_ADD_RECORD] + += CNAT_NFV9_DS_LITE_BULK_ADD_RECORD_LENGTH; + + nfv9_logging_info->pkt_length += CNAT_NFV9_DS_LITE_BULK_ADD_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + nfv9_logging_info->record[DS_LITE_BULK_ADD_RECORD] + += CNAT_NFV9_DS_LITE_BULK_ADD_RECORD_LENGTH; + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[DS_LITE_BULK_ADD_RECORD]; + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[DS_LITE_BULK_ADD_RECORD]); +} + + +static void cnat_nfv9_ds_lite_insert_bulk_del_record( + cnat_nfv9_logging_info_t *nfv9_logging_info, + cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry, + int bulk_alloc_start_port) +{ + + nfv9_ds_lite_bulk_del_record_t nfv9_logging_bulk_del_record = {0}; + cnat_user_db_entry_t *udb = NULL; + + if (PREDICT_FALSE(nfv9_logging_info->record[DS_LITE_BULK_DEL_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, DS_LITE_BULK_DEL_RECORD); + } + udb = cnat_user_db + db->user_index; + if (PREDICT_FALSE(!udb)) { + return; + } + /* + * We should definitely have add_record now, no need to sanitize + */ + + nfv9_logging_bulk_del_record.inside_vrf_id = + clib_host_to_net_u32(dslite_entry->i_vrf_id); + +#ifdef DSLITE_USER_IPV4 + nfv9_logging_bulk_del_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); +#else + nfv9_logging_bulk_del_record.inside_ip_addr = + clib_host_to_net_u32(0); +#endif + + nfv9_logging_bulk_del_record.inside_v6_src_addr[0] = + clib_host_to_net_u32(udb->ipv6[0]); + nfv9_logging_bulk_del_record.inside_v6_src_addr[1] = + clib_host_to_net_u32(udb->ipv6[1]); + nfv9_logging_bulk_del_record.inside_v6_src_addr[2] = + clib_host_to_net_u32(udb->ipv6[2]); + nfv9_logging_bulk_del_record.inside_v6_src_addr[3] = + clib_host_to_net_u32(udb->ipv6[3]); + + nfv9_logging_bulk_del_record.outside_ip_port_start = + clib_host_to_net_u16(bulk_alloc_start_port); + + memcpy(nfv9_logging_info->record[DS_LITE_BULK_DEL_RECORD], + &nfv9_logging_bulk_del_record, + CNAT_NFV9_DS_LITE_BULK_DEL_RECORD_LENGTH); + nfv9_logging_info->record_length[DS_LITE_BULK_DEL_RECORD] += + CNAT_NFV9_DS_LITE_BULK_DEL_RECORD_LENGTH; + nfv9_logging_info->pkt_length += + CNAT_NFV9_DS_LITE_BULK_DEL_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + nfv9_logging_info->record[DS_LITE_BULK_DEL_RECORD] += + CNAT_NFV9_DS_LITE_BULK_DEL_RECORD_LENGTH; + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[DS_LITE_BULK_DEL_RECORD]; + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[DS_LITE_BULK_DEL_RECORD]); +} +#endif /* #ifndef NO_BULK_LOGGING */ + +static void cnat_nfv9_insert_del_record( + cnat_nfv9_logging_info_t *nfv9_logging_info, + cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap) +{ + u16 my_proto_mask; + u8 my_protocol; + nfv9_del_record_t nfv9_logging_del_record; + + if (PREDICT_FALSE(nfv9_logging_info->record[NAT44_DEL_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, NAT44_DEL_RECORD); + } + + /* + * We should definitely have add_record now, no need to sanitize + */ + + nfv9_logging_del_record.inside_vrf_id = + clib_host_to_net_u32(vrfmap->i_vrf_id); + + nfv9_logging_del_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + + nfv9_logging_del_record.inside_ip_port = + clib_host_to_net_u16(db->in2out_key.k.port); + + my_proto_mask = db->in2out_key.k.vrf & CNAT_PRO_MASK; + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? ICMP_PROT : GRE_PROT))); + + nfv9_logging_del_record.protocol = my_protocol; + + memcpy(nfv9_logging_info->record[NAT44_DEL_RECORD], + &nfv9_logging_del_record, CNAT_NFV9_DEL_RECORD_LENGTH); + + nfv9_logging_info->record_length[NAT44_DEL_RECORD] + += CNAT_NFV9_DEL_RECORD_LENGTH; + + nfv9_logging_info->pkt_length += CNAT_NFV9_DEL_RECORD_LENGTH; + + nfv9_logging_info->record[NAT44_DEL_RECORD] + += CNAT_NFV9_DEL_RECORD_LENGTH; + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[NAT44_DEL_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[NAT44_DEL_RECORD]); + +} + +#ifndef NO_BULK_LOGGING +static void cnat_nfv9_insert_bulk_del_record( + cnat_nfv9_logging_info_t *nfv9_logging_info, + cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap, + int bulk_alloc_start_port) +{ + nfv9_bulk_del_record_t nfv9_logging_bulk_del_record; + if (PREDICT_FALSE(nfv9_logging_info->record[NAT44_BULK_DEL_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, NAT44_BULK_DEL_RECORD); + } + + /* + * We should definitely have add_record now, no need to sanitize + */ + + nfv9_logging_bulk_del_record.inside_vrf_id = + clib_host_to_net_u32(vrfmap->i_vrf_id); + + nfv9_logging_bulk_del_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + + nfv9_logging_bulk_del_record.outside_ip_port_start = + clib_host_to_net_u16(bulk_alloc_start_port); + + memcpy(nfv9_logging_info->record[NAT44_BULK_DEL_RECORD], + &nfv9_logging_bulk_del_record, CNAT_NFV9_BULK_DEL_RECORD_LENGTH); + + nfv9_logging_info->record_length[NAT44_BULK_DEL_RECORD] + += CNAT_NFV9_BULK_DEL_RECORD_LENGTH; + + nfv9_logging_info->pkt_length += CNAT_NFV9_BULK_DEL_RECORD_LENGTH; + + nfv9_logging_info->record[NAT44_BULK_DEL_RECORD] + += CNAT_NFV9_BULK_DEL_RECORD_LENGTH; + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[NAT44_BULK_DEL_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[NAT44_BULK_DEL_RECORD]); + +} + +#endif /* #ifndef NO_BULK_LOGGING */ +/* + * edt: * * cnat_nfv9_log_mapping_create + * + * Tries to log a creation of mapping record + * + * Argument: cnat_main_db_entry_t *db + * Main DB entry being created + * + * Argument: cnat_vrfmap_t *vrfmap + * VRF Map for the Main DB entry being created + */ +void cnat_nfv9_log_mapping_create (cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + cnat_nfv9_logging_info_t *nfv9_logging_info = 0; + vlib_main_t * vm = vlib_get_main(); + + if (PREDICT_FALSE(vrfmap->nfv9_logging_index == EMPTY)) { + + //vlib_cli_output(vm, "\n1. Log Mapping failed"); + /* + * No logging configured, silently return + */ + return; + } + + if (cnat_nfv9_logging_info_pool == NULL) { + vlib_cli_output(vm, "%s: info_pool pointer is NULL !!!!\n", __func__); + return; + } + nfv9_logging_info = + cnat_nfv9_logging_info_pool + vrfmap->nfv9_logging_index; + + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + //vlib_cli_output(vm, "\n2. Log Mapping failed"); + return; + } + + } + +#ifndef NO_BULK_LOGGING + if(bulk_alloc > 0) { /* new bulk alloc - use bulk add template */ + cnat_nfv9_insert_bulk_add_record(nfv9_logging_info, db, vrfmap, + bulk_alloc); + } else if(bulk_alloc == CACHE_ALLOC_NO_LOG_REQUIRED) + return; /* No logging required.. bulk port usage */ + else /* Individual logging .. fall back to old method */ +#endif + cnat_nfv9_insert_add_record(nfv9_logging_info, db, vrfmap); + + nfv9_logging_info->total_record_count += 1; + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } +} + +/* + * edt: * * cnat_nfv9_log_mapping_delete + * + * Tries to log a deletion of mapping record + * + * Argument: cnat_main_db_entry_t *db + * Main DB entry being deleted + * + * Argument: cnat_vrfmap_t *vrfmap + * VRF Map for the Main DB entry being deleted + */ +void cnat_nfv9_log_mapping_delete (cnat_main_db_entry_t * db, + cnat_vrfmap_t *vrfmap +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + cnat_nfv9_logging_info_t *nfv9_logging_info = 0; + + if (PREDICT_FALSE(vrfmap->nfv9_logging_index == EMPTY)) { + //vlib_cli_output(vm, "\n3. Log Mapping failed"); + /* + * No logging configured, silently return + */ + return; + } + + nfv9_logging_info = + cnat_nfv9_logging_info_pool + vrfmap->nfv9_logging_index; + + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + //vlib_cli_output(vm, "\n4. Log Mapping failed"); + return; + } + } +#ifndef NO_BULK_LOGGING + if(bulk_alloc > 0) { /* new bulk alloc - use bulk add template */ + cnat_nfv9_insert_bulk_del_record(nfv9_logging_info, db, vrfmap, + bulk_alloc); + } else if(bulk_alloc == CACHE_ALLOC_NO_LOG_REQUIRED) + return; /* No logging required.. bulk port usage */ + else /* Individual logging .. fall back to old method */ +#endif + cnat_nfv9_insert_del_record(nfv9_logging_info, db, vrfmap); + + nfv9_logging_info->total_record_count += 1; + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } +} + + +/* NAT64 Related routines */ + +/* + * edt: * * cnat_nfv9_bib_mapping_create + * + * Tries to log a creation of Bib mapping record + * + * Argument: nat64_bib_entry_t *db + * BIB DB entry being created + * + * Argument: nat64_table_entry_t *nat64_entry + * NAT64 Instance where this BIB belongs + */ +void cnat_nfv9_bib_mapping_create (nat64_bib_entry_t *db, + nat64_table_entry_t *nat64_entry) +{ + cnat_nfv9_logging_info_t *nfv9_logging_info = 0; + u16 my_proto_mask; + u8 my_protocol; + nfv9_nat64_add_bib_record_t nfv9_logging_add_record; + + if (PREDICT_FALSE(nat64_entry->logging_index == EMPTY)) { + /* + * No logging configured, silently return + */ + return; + } + + nfv9_logging_info = + cnat_nfv9_logging_info_pool + nat64_entry->logging_index; + + + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + return; + } + } + + if (PREDICT_FALSE(nfv9_logging_info->record[NAT64_ADD_BIB_RECORD] == NULL)){ + cnat_nfv9_record_create(nfv9_logging_info,NAT64_ADD_BIB_RECORD); + } + + + nfv9_logging_add_record.inside_v6_src_addr[0] = + clib_host_to_net_u32(db->v6_in_key.ipv6[0]); + nfv9_logging_add_record.inside_v6_src_addr[1] = + clib_host_to_net_u32(db->v6_in_key.ipv6[1]); + nfv9_logging_add_record.inside_v6_src_addr[2] = + clib_host_to_net_u32(db->v6_in_key.ipv6[2]); + nfv9_logging_add_record.inside_v6_src_addr[3] = + clib_host_to_net_u32(db->v6_in_key.ipv6[3]); + + + nfv9_logging_add_record.outside_v4_src_addr = + clib_host_to_net_u32(db->v4_out_key.k.ipv4); + + nfv9_logging_add_record.inside_src_port = + clib_host_to_net_u16(db->v6_in_key.port); + nfv9_logging_add_record.outside_src_port = + clib_host_to_net_u16(db->v4_out_key.k.port); + + my_proto_mask = db->v6_in_key.vrf & CNAT_PRO_MASK; + + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? IPV6_PROTO_ICMPV6 : 0))); + nfv9_logging_add_record.protocol = my_protocol; + + + memcpy(nfv9_logging_info->record[NAT64_ADD_BIB_RECORD], + &nfv9_logging_add_record, CNAT_NFV9_NAT64_ADD_BIB_RECORD_LENGTH); + + nfv9_logging_info->record_length[NAT64_ADD_BIB_RECORD] += + CNAT_NFV9_NAT64_ADD_BIB_RECORD_LENGTH; + nfv9_logging_info->pkt_length += CNAT_NFV9_NAT64_ADD_BIB_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + + nfv9_logging_info->record[NAT64_ADD_BIB_RECORD] + += CNAT_NFV9_NAT64_ADD_BIB_RECORD_LENGTH; + + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[NAT64_ADD_BIB_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[NAT64_ADD_BIB_RECORD]); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } +} + + +/* + * edt: * * cnat_nfv9_session_mapping_create + * + * Tries to log a creation of Bib mapping record + * + * Argument: nat64_bib_entry_t *bdb + * BIB DB entry for the session that is created + * + * Argument: nat64_session_entry_t *sdb + * Session DB entry being created + * + * Argument: nat64_table_entry_t *nat64_entry + * NAT64 Instance where this BIB and Session belongs + */ +void cnat_nfv9_session_mapping_create (nat64_bib_entry_t *bdb, + nat64_session_entry_t *sdb, + nat64_table_entry_t *nat64_entry_ptr) +{ + cnat_nfv9_logging_info_t *nfv9_logging_info = 0; + u16 my_proto_mask; + u8 my_protocol; + u32 dest_v6[4]; + nfv9_nat64_add_session_record_t nfv9_logging_add_record; + u8 *ipv6_addr_ptr; + u8 *ipv4_addr_ptr; + + + if (PREDICT_FALSE(nat64_entry_ptr->logging_index == EMPTY)) { + /* + * No logging configured, silently return + */ + return; + } + + nfv9_logging_info = + cnat_nfv9_logging_info_pool + nat64_entry_ptr->logging_index; + + + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)){ + return; + } + } + + if (PREDICT_FALSE(nfv9_logging_info->record[NAT64_ADD_SESSION_RECORD] + == NULL)){ + cnat_nfv9_record_create(nfv9_logging_info, NAT64_ADD_SESSION_RECORD); + } + + + nfv9_logging_add_record.inside_v6_src_addr[0] = + clib_host_to_net_u32(bdb->v6_in_key.ipv6[0]); + nfv9_logging_add_record.inside_v6_src_addr[1] = + clib_host_to_net_u32(bdb->v6_in_key.ipv6[1]); + nfv9_logging_add_record.inside_v6_src_addr[2] = + clib_host_to_net_u32(bdb->v6_in_key.ipv6[2]); + nfv9_logging_add_record.inside_v6_src_addr[3] = + clib_host_to_net_u32(bdb->v6_in_key.ipv6[3]); + + + nfv9_logging_add_record.outside_v4_src_addr = + clib_host_to_net_u32(bdb->v4_out_key.k.ipv4); + + + nfv9_logging_add_record.outside_v4_dest_addr = + clib_host_to_net_u32(sdb->v4_dest_key.k.ipv4); + + /* Need to create the V6 address using prefix */ + dest_v6[0] = nat64_entry_ptr->v6_prefix[0]; + dest_v6[1] = nat64_entry_ptr->v6_prefix[1]; + dest_v6[2] = nat64_entry_ptr->v6_prefix[2]; + dest_v6[3] = nat64_entry_ptr->v6_prefix[3]; + + ipv6_addr_ptr = (u8 *) (&(dest_v6[0])); + ipv4_addr_ptr = (u8 *) (&(sdb->v4_dest_key.k.ipv4)); + + *(ipv6_addr_ptr + nat64_entry_ptr->octet0_position) = *(ipv4_addr_ptr); + *(ipv6_addr_ptr + nat64_entry_ptr->octet1_position) = *(ipv4_addr_ptr + 1); + *(ipv6_addr_ptr + nat64_entry_ptr->octet2_position) = *(ipv4_addr_ptr + 2); + *(ipv6_addr_ptr + nat64_entry_ptr->octet3_position) = *(ipv4_addr_ptr + 3); + + nfv9_logging_add_record.inside_v6_dest_addr[0] = + clib_host_to_net_u32(dest_v6[0]); + nfv9_logging_add_record.inside_v6_dest_addr[1] = + clib_host_to_net_u32(dest_v6[1]); + nfv9_logging_add_record.inside_v6_dest_addr[2] = + clib_host_to_net_u32(dest_v6[2]); + nfv9_logging_add_record.inside_v6_dest_addr[3] = + clib_host_to_net_u32(dest_v6[3]); + + nfv9_logging_add_record.outside_v4_dest_addr = + clib_host_to_net_u32(sdb->v4_dest_key.k.ipv4); + + nfv9_logging_add_record.inside_src_port = + clib_host_to_net_u16(bdb->v6_in_key.port); + nfv9_logging_add_record.outside_src_port = + clib_host_to_net_u16(bdb->v4_out_key.k.port); + + nfv9_logging_add_record.dest_port = + clib_host_to_net_u16(sdb->v4_dest_key.k.port); + + + my_proto_mask = bdb->v6_in_key.vrf & CNAT_PRO_MASK; + + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? IPV6_PROTO_ICMPV6 : 0))); + nfv9_logging_add_record.protocol = my_protocol; + + + memcpy(nfv9_logging_info->record[NAT64_ADD_SESSION_RECORD], + &nfv9_logging_add_record, CNAT_NFV9_NAT64_ADD_SESSION_RECORD_LENGTH); + + nfv9_logging_info->record_length[NAT64_ADD_SESSION_RECORD] += + CNAT_NFV9_NAT64_ADD_SESSION_RECORD_LENGTH; + nfv9_logging_info->pkt_length += CNAT_NFV9_NAT64_ADD_SESSION_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + + nfv9_logging_info->record[NAT64_ADD_SESSION_RECORD] + += CNAT_NFV9_NAT64_ADD_SESSION_RECORD_LENGTH; + + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[NAT64_ADD_SESSION_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[NAT64_ADD_SESSION_RECORD]); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } +} + + +/* + * edt: * * cnat_nfv9_bib_mapping_delete + * + * Tries to log a deletion of Bib mapping record + * + * Argument: nat64_bib_entry_t *db + * BIB DB entry being created + * + * Argument: nat64_table_entry_t *nat64_entry + * NAT64 Instance where this BIB belongs + */ +void cnat_nfv9_bib_mapping_delete (nat64_bib_entry_t *db, + nat64_table_entry_t *nat64_entry) +{ + cnat_nfv9_logging_info_t *nfv9_logging_info = 0; + u16 my_proto_mask; + u8 my_protocol; + nfv9_nat64_del_bib_record_t nfv9_logging_del_record; + if (PREDICT_FALSE(nat64_entry->logging_index == EMPTY)) { + /* + * No logging configured, silently return + */ + return; + } + + nfv9_logging_info = + cnat_nfv9_logging_info_pool + nat64_entry->logging_index; + + + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)){ + return; + } + } + + if (PREDICT_FALSE(nfv9_logging_info->record[NAT64_DEL_BIB_RECORD] == NULL)){ + cnat_nfv9_record_create(nfv9_logging_info,NAT64_DEL_BIB_RECORD); + } + + + nfv9_logging_del_record.inside_v6_src_addr[0] = + clib_host_to_net_u32(db->v6_in_key.ipv6[0]); + nfv9_logging_del_record.inside_v6_src_addr[1] = + clib_host_to_net_u32(db->v6_in_key.ipv6[1]); + nfv9_logging_del_record.inside_v6_src_addr[2] = + clib_host_to_net_u32(db->v6_in_key.ipv6[2]); + nfv9_logging_del_record.inside_v6_src_addr[3] = + clib_host_to_net_u32(db->v6_in_key.ipv6[3]); + + + nfv9_logging_del_record.inside_src_port = + clib_host_to_net_u16(db->v6_in_key.port); + + my_proto_mask = db->v6_in_key.vrf & CNAT_PRO_MASK; + + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? IPV6_PROTO_ICMPV6 : 0))); + nfv9_logging_del_record.protocol = my_protocol; + + + memcpy(nfv9_logging_info->record[NAT64_DEL_BIB_RECORD], + &nfv9_logging_del_record, CNAT_NFV9_NAT64_DEL_BIB_RECORD_LENGTH); + + nfv9_logging_info->record_length[NAT64_DEL_BIB_RECORD] += + CNAT_NFV9_NAT64_DEL_BIB_RECORD_LENGTH; + nfv9_logging_info->pkt_length += CNAT_NFV9_NAT64_DEL_BIB_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + + nfv9_logging_info->record[NAT64_DEL_BIB_RECORD] + += CNAT_NFV9_NAT64_DEL_BIB_RECORD_LENGTH; + + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[NAT64_DEL_BIB_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[NAT64_DEL_BIB_RECORD]); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } +} + + +/* + * edt: * * cnat_nfv9_session_mapping_delete + * + * Tries to log a deletion of Bib mapping record + * + * Argument: nat64_bib_entry_t *bdb + * BIB DB entry for the session that is created + * + * Argument: nat64_session_entry_t *sdb + * Session DB entry being created + * + * Argument: nat64_table_entry_t *nat64_entry + * NAT64 Instance where this BIB and Session belongs + */ +void cnat_nfv9_session_mapping_delete (nat64_bib_entry_t *bdb, + nat64_session_entry_t *sdb, + nat64_table_entry_t *nat64_entry_ptr) +{ + cnat_nfv9_logging_info_t *nfv9_logging_info = 0; + u16 my_proto_mask; + u8 my_protocol; + u32 dest_v6[4]; + nfv9_nat64_del_session_record_t nfv9_logging_del_record; + u8 *ipv6_addr_ptr; + u8 *ipv4_addr_ptr; + + if (PREDICT_FALSE(nat64_entry_ptr->logging_index == EMPTY)) { + /* + * No logging configured, silently return + */ + return; + } + + nfv9_logging_info = + cnat_nfv9_logging_info_pool + nat64_entry_ptr->logging_index; + + + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)){ + return; + } + } + + if (PREDICT_FALSE(nfv9_logging_info->record[NAT64_DEL_SESSION_RECORD] + == NULL)){ + cnat_nfv9_record_create(nfv9_logging_info, NAT64_DEL_SESSION_RECORD); + } + + + nfv9_logging_del_record.inside_v6_src_addr[0] = + clib_host_to_net_u32(bdb->v6_in_key.ipv6[0]); + nfv9_logging_del_record.inside_v6_src_addr[1] = + clib_host_to_net_u32(bdb->v6_in_key.ipv6[1]); + nfv9_logging_del_record.inside_v6_src_addr[2] = + clib_host_to_net_u32(bdb->v6_in_key.ipv6[2]); + nfv9_logging_del_record.inside_v6_src_addr[3] = + clib_host_to_net_u32(bdb->v6_in_key.ipv6[3]); + + /* Need to create the V6 address using prefix */ + dest_v6[0] = nat64_entry_ptr->v6_prefix[0]; + dest_v6[1] = nat64_entry_ptr->v6_prefix[1]; + dest_v6[2] = nat64_entry_ptr->v6_prefix[2]; + dest_v6[3] = nat64_entry_ptr->v6_prefix[3]; + + ipv6_addr_ptr = (u8 *) (&(dest_v6[0])); + ipv4_addr_ptr = (u8 *) (&(sdb->v4_dest_key.k.ipv4)); + + *(ipv6_addr_ptr + nat64_entry_ptr->octet0_position) = *(ipv4_addr_ptr); + *(ipv6_addr_ptr + nat64_entry_ptr->octet1_position) = *(ipv4_addr_ptr + 1); + *(ipv6_addr_ptr + nat64_entry_ptr->octet2_position) = *(ipv4_addr_ptr + 2); + *(ipv6_addr_ptr + nat64_entry_ptr->octet3_position) = *(ipv4_addr_ptr + 3); + + nfv9_logging_del_record.inside_v6_dest_addr[0] = + clib_host_to_net_u32(dest_v6[0]); + nfv9_logging_del_record.inside_v6_dest_addr[1] = + clib_host_to_net_u32(dest_v6[1]); + nfv9_logging_del_record.inside_v6_dest_addr[2] = + clib_host_to_net_u32(dest_v6[2]); + nfv9_logging_del_record.inside_v6_dest_addr[3] = + clib_host_to_net_u32(dest_v6[3]); + + nfv9_logging_del_record.inside_src_port = + clib_host_to_net_u16(bdb->v6_in_key.port); + + nfv9_logging_del_record.dest_port = + clib_host_to_net_u16(sdb->v4_dest_key.k.port); + + + my_proto_mask = bdb->v6_in_key.vrf & CNAT_PRO_MASK; + + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? IPV6_PROTO_ICMPV6 : 0))); + nfv9_logging_del_record.protocol = my_protocol; + + memcpy(nfv9_logging_info->record[NAT64_DEL_SESSION_RECORD], + &nfv9_logging_del_record, CNAT_NFV9_NAT64_DEL_SESSION_RECORD_LENGTH); + + nfv9_logging_info->record_length[NAT64_DEL_SESSION_RECORD] += + CNAT_NFV9_NAT64_DEL_SESSION_RECORD_LENGTH; + nfv9_logging_info->pkt_length += CNAT_NFV9_NAT64_DEL_SESSION_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + + nfv9_logging_info->record[NAT64_DEL_SESSION_RECORD] + += CNAT_NFV9_NAT64_DEL_SESSION_RECORD_LENGTH; + + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[NAT64_DEL_SESSION_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[NAT64_DEL_SESSION_RECORD]); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } +} + +/* + * edt: * * cnat_nfv9_nat44_log_session_create + * + * Tries to log a creation of mapping record (session based) + * + * Argument: cnat_main_db_entry_t *db + * Main DB entry being created + * Arugment: cnat_session_entry_t *sdb + * Session DB entry if the destination is not the first dest + * Argument: cnat_vrfmap_t *vrfmap + * VRF Map for the Main DB entry being created + */ + +void cnat_nfv9_nat44_log_session_create(cnat_main_db_entry_t *db, + cnat_session_entry_t *sdb, + cnat_vrfmap_t *vrfmap) +{ + cnat_nfv9_logging_info_t *nfv9_logging_info = 0; + u16 my_proto_mask; + u8 my_protocol; + nfv9_add_session_record_t nfv9_logging_add_session_record; + + if (PREDICT_FALSE(vrfmap->nfv9_logging_index == EMPTY)) { + //vlib_cli_output(vm,"\n1. Log Mapping failed"); + /* + * No logging configured, silently return + */ + return; + } + + nfv9_logging_info = + cnat_nfv9_logging_info_pool + vrfmap->nfv9_logging_index; + + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + //vlib_cli_output(vm,"\n2. Log Mapping failed"); + return; + } + } + + if(PREDICT_FALSE(nfv9_logging_info->record[ + NAT44_ADD_SESSION_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, NAT44_ADD_SESSION_RECORD); + } + + /* + * We should definitely have add_record now, no need to sanitize + */ + nfv9_logging_add_session_record.inside_vrf_id = + clib_host_to_net_u32(vrfmap->i_vrf_id); + nfv9_logging_add_session_record.outside_vrf_id = + clib_host_to_net_u32(vrfmap->o_vrf_id); + + nfv9_logging_add_session_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + nfv9_logging_add_session_record.outside_ip_addr = + clib_host_to_net_u32(db->out2in_key.k.ipv4); + + /* If sdb is null, it is assumed that logging is being done + * for the first destination which is held in the main db + + * itself + */ + if(PREDICT_TRUE(sdb == NULL)) { + nfv9_logging_add_session_record.dest_ip_addr = + clib_host_to_net_u32(db->dst_ipv4); + nfv9_logging_add_session_record.dest_port = + clib_host_to_net_u16(db->dst_port); + } else { + nfv9_logging_add_session_record.dest_ip_addr = + clib_host_to_net_u32(sdb->v4_dest_key.k.ipv4); + nfv9_logging_add_session_record.dest_port = + clib_host_to_net_u16(sdb->v4_dest_key.k.port); + } + + nfv9_logging_add_session_record.inside_ip_port = + clib_host_to_net_u16(db->in2out_key.k.port); + nfv9_logging_add_session_record.outside_ip_port = + clib_host_to_net_u16(db->out2in_key.k.port); + + + my_proto_mask = db->in2out_key.k.vrf & CNAT_PRO_MASK; + + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? ICMP_PROT : GRE_PROT))); + nfv9_logging_add_session_record.protocol = my_protocol; + + memcpy(nfv9_logging_info->record[NAT44_ADD_SESSION_RECORD], + &nfv9_logging_add_session_record, + CNAT_NFV9_NAT44_ADD_SESSION_RECORD_LENGTH); + + nfv9_logging_info->record_length[NAT44_ADD_SESSION_RECORD] + += CNAT_NFV9_NAT44_ADD_SESSION_RECORD_LENGTH; + nfv9_logging_info->pkt_length += CNAT_NFV9_NAT44_ADD_SESSION_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + + + nfv9_logging_info->record[NAT44_ADD_SESSION_RECORD] + += CNAT_NFV9_NAT44_ADD_SESSION_RECORD_LENGTH; + + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[NAT44_ADD_SESSION_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[NAT44_ADD_SESSION_RECORD]); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } +} + +/* + * edt: * * cnat_nfv9_nat44_log_session_delete + * + * Tries to log a deletion of mapping record (session based) + * + * Argument: cnat_main_db_entry_t *db + * Main DB entry being created + * Arugment: cnat_session_entry_t *sdb + * Session DB entry if the destination is not the first dest + * Argument: cnat_vrfmap_t *vrfmap + * VRF Map for the Main DB entry being deleted + */ + +void cnat_nfv9_nat44_log_session_delete(cnat_main_db_entry_t *db, + cnat_session_entry_t *sdb, + cnat_vrfmap_t *vrfmap) +{ + cnat_nfv9_logging_info_t *nfv9_logging_info = 0; + u16 my_proto_mask; + u8 my_protocol; + nfv9_del_session_record_t nfv9_logging_del_session_record; + + if (PREDICT_FALSE(vrfmap->nfv9_logging_index == EMPTY)) { + //vlib_cli_output(vm, "\n1. Log Mapping failed"); + /* + * No logging configured, silently return + */ + return; + } + + nfv9_logging_info = + cnat_nfv9_logging_info_pool + vrfmap->nfv9_logging_index; + + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + //vlib_cli_output(vm, "\n2. Log Mapping failed"); + return; + } + } + + if(PREDICT_FALSE(nfv9_logging_info->record[ + NAT44_DEL_SESSION_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, NAT44_DEL_SESSION_RECORD); + } + + /* + * We should definitely have add_record now, no need to sanitize + */ + nfv9_logging_del_session_record.inside_vrf_id = + clib_host_to_net_u32(vrfmap->i_vrf_id); + + nfv9_logging_del_session_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + + /* If sdb is null, it is assumed that logging is being done + * for the first destination which is held in the main db + * itself + */ + if(PREDICT_TRUE(sdb == NULL)) { + nfv9_logging_del_session_record.dest_ip_addr = + clib_host_to_net_u32(db->dst_ipv4); + nfv9_logging_del_session_record.dest_port = + clib_host_to_net_u16(db->dst_port); + } else { + nfv9_logging_del_session_record.dest_ip_addr = + clib_host_to_net_u32(sdb->v4_dest_key.k.ipv4); + nfv9_logging_del_session_record.dest_port = + clib_host_to_net_u16(sdb->v4_dest_key.k.port); + } + + nfv9_logging_del_session_record.inside_ip_port = + clib_host_to_net_u16(db->in2out_key.k.port); + + my_proto_mask = db->in2out_key.k.vrf & CNAT_PRO_MASK; + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? ICMP_PROT : GRE_PROT))); + + nfv9_logging_del_session_record.protocol = my_protocol; + + memcpy(nfv9_logging_info->record[NAT44_DEL_SESSION_RECORD], + &nfv9_logging_del_session_record, + CNAT_NFV9_NAT44_DEL_SESSION_RECORD_LENGTH); + + nfv9_logging_info->record_length[NAT44_DEL_SESSION_RECORD] + += CNAT_NFV9_NAT44_DEL_SESSION_RECORD_LENGTH; + nfv9_logging_info->pkt_length += CNAT_NFV9_NAT44_DEL_SESSION_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + + nfv9_logging_info->record[NAT44_DEL_SESSION_RECORD] + += CNAT_NFV9_NAT44_DEL_SESSION_RECORD_LENGTH; + + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[NAT44_DEL_SESSION_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[NAT44_DEL_SESSION_RECORD]); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } +} + +/* + * DS-Lite APIs for netflow logging + */ + +/* + * edt: * * cnat_nfv9_ds_lite_mapping_create + * + * Tries to log a creation of mapping record + * + * Argument: cnat_main_db_entry_t *db + * Main DB entry being created + * + * Argument: dslite_table_entry_t *dslite_entry + * ds-lite instance for the Main DB entry being created + */ +void cnat_nfv9_ds_lite_mapping_create(cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + + cnat_nfv9_logging_info_t *nfv9_logging_info = NULL; + + if (PREDICT_FALSE(!(db && dslite_entry))) { + return; + } + if (PREDICT_FALSE(dslite_entry->nfv9_logging_index == EMPTY)) { + /* + * no logging configured, silently return + */ + return; + } + + nfv9_logging_info = + cnat_nfv9_logging_info_pool + dslite_entry->nfv9_logging_index; + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + return; + } + } +#ifndef NO_BULK_LOGGING + if(bulk_alloc > 0) { /* new bulk alloc - use bulk add template */ + cnat_nfv9_ds_lite_insert_bulk_add_record(nfv9_logging_info, + db, dslite_entry, bulk_alloc); + } else if(bulk_alloc == CACHE_ALLOC_NO_LOG_REQUIRED) + return; /* No logging required.. bulk port usage */ + else /* Individual logging .. fall back to old method */ +#endif /*NO_BULK_LOGGING*/ + cnat_nfv9_ds_lite_insert_add_record(nfv9_logging_info, db, dslite_entry); + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } +} + +/* + * edt: * * cnat_nfv9_ds_lite_mapping_delete + * + * Tries to log a deletion of mapping record + * + * Argument: cnat_main_db_entry_t *db + * Main DB entry being deleted + * + * Argument: dslite_table_entry_t *dslite_entry + * ds-lite instance for the Main DB entry being deleted + */ +void cnat_nfv9_ds_lite_mapping_delete(cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + + cnat_nfv9_logging_info_t *nfv9_logging_info = NULL; + if (PREDICT_FALSE(!(db && dslite_entry))) { + return; + } + if (PREDICT_FALSE(dslite_entry->nfv9_logging_index == EMPTY)) { + /* + * No logging configured, silently return + */ + return; + } + nfv9_logging_info = + cnat_nfv9_logging_info_pool + dslite_entry->nfv9_logging_index; + + + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + return; + } + } +#ifndef NO_BULK_LOGGING + if(bulk_alloc > 0) { /* new bulk alloc - use bulk add template */ + cnat_nfv9_ds_lite_insert_bulk_del_record(nfv9_logging_info, + db, dslite_entry, bulk_alloc); + } else if(bulk_alloc == CACHE_ALLOC_NO_LOG_REQUIRED) + return; /* No logging required.. bulk port usage */ + else /* Individual logging .. fall back to old method */ +#endif /*NO_BULK_LOGGING*/ + cnat_nfv9_ds_lite_insert_del_record(nfv9_logging_info, db, dslite_entry); + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } +} + +/* + * edt: * * cnat_nfv9_dslite_log_session_create + * + * Tries to log a creation of mapping record (session based) + * Argument: cnat_main_db_entry_t *db + * Main DB entry being created + * Arugment: cnat_session_entry_t *sdb + * Session DB entry if the destination is not the first dest + * Argument: dslite_table_entry_t *dslite_entry, + * dslite table entry for dslite instance + */ + +void cnat_nfv9_ds_lite_log_session_create( + cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry, + cnat_session_entry_t *sdb) +{ + + nfv9_ds_lite_add_session_record_t nfv9_logging_add_record ; + cnat_user_db_entry_t *udb = NULL; + u16 my_proto_mask; + u8 my_protocol; + cnat_nfv9_logging_info_t *nfv9_logging_info = 0; + + if (PREDICT_FALSE(dslite_entry->nfv9_logging_index == EMPTY)) { + /* + * no logging configured, silently return + */ + return; + } + + nfv9_logging_info = + cnat_nfv9_logging_info_pool + dslite_entry->nfv9_logging_index; + udb = cnat_user_db + db->user_index; + + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + return; + } + } + + udb = cnat_user_db + db->user_index; + if (PREDICT_FALSE(!udb)) { + return; + } + if (PREDICT_FALSE(nfv9_logging_info->record[DS_LITE_ADD_SESSION_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, DS_LITE_ADD_SESSION_RECORD); + } + /* + * We should definitely have add_record now, no need to sanitize + */ + nfv9_logging_add_record.inside_vrf_id = + clib_host_to_net_u32(dslite_entry->i_vrf_id); + nfv9_logging_add_record.outside_vrf_id = + clib_host_to_net_u32(dslite_entry->o_vrf_id); + + nfv9_logging_add_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + + nfv9_logging_add_record.inside_v6_src_addr[0] = + clib_host_to_net_u32(udb->ipv6[0]); + nfv9_logging_add_record.inside_v6_src_addr[1] = + clib_host_to_net_u32(udb->ipv6[1]); + nfv9_logging_add_record.inside_v6_src_addr[2] = + clib_host_to_net_u32(udb->ipv6[2]); + nfv9_logging_add_record.inside_v6_src_addr[3] = + clib_host_to_net_u32(udb->ipv6[3]); + + nfv9_logging_add_record.outside_ip_addr = + clib_host_to_net_u32(db->out2in_key.k.ipv4); + + nfv9_logging_add_record.inside_ip_port = + clib_host_to_net_u16(db->in2out_key.k.port); + nfv9_logging_add_record.outside_ip_port = + clib_host_to_net_u16(db->out2in_key.k.port); + + /* If sdb is null, it is assumed that logging is being done + * for the first destination which is held in the main db + + * itself + */ + if(PREDICT_TRUE(sdb == NULL)) { + nfv9_logging_add_record.dest_ip_addr = + clib_host_to_net_u32(db->dst_ipv4); + nfv9_logging_add_record.dest_port = + clib_host_to_net_u16(db->dst_port); + } else { + nfv9_logging_add_record.dest_ip_addr = + clib_host_to_net_u32(sdb->v4_dest_key.k.ipv4); + nfv9_logging_add_record.dest_port = + clib_host_to_net_u16(sdb->v4_dest_key.k.port); + } + + + my_proto_mask = db->in2out_key.k.vrf & CNAT_PRO_MASK; + + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? ICMP_PROT : 0))); + nfv9_logging_add_record.protocol = my_protocol; + + memcpy(nfv9_logging_info->record[DS_LITE_ADD_SESSION_RECORD], + &nfv9_logging_add_record, CNAT_NFV9_DS_LITE_ADD_SESSION_RECORD_LENGTH); + + nfv9_logging_info->record_length[DS_LITE_ADD_SESSION_RECORD] + += CNAT_NFV9_DS_LITE_ADD_SESSION_RECORD_LENGTH; + + nfv9_logging_info->pkt_length += CNAT_NFV9_DS_LITE_ADD_SESSION_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + + nfv9_logging_info->record[DS_LITE_ADD_SESSION_RECORD] + += CNAT_NFV9_DS_LITE_ADD_SESSION_RECORD_LENGTH; + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[DS_LITE_ADD_SESSION_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[DS_LITE_ADD_SESSION_RECORD]); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } + +} + +/* + * edt: * * cnat_nfv9_dslite_log_session_delete + * + * Tries to log a creation of mapping record (session based) + * Argument: cnat_main_db_entry_t *db + * Main DB entry being created + * Arugment: cnat_session_entry_t *sdb + * Session DB entry if the destination is not the first dest + * Argument: dslite_table_entry_t *dslite_entry, + * dslite table entry for dslite instance + */ + +void cnat_nfv9_ds_lite_log_session_delete( + cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry, + cnat_session_entry_t *sdb) +{ + + nfv9_ds_lite_del_session_record_t nfv9_logging_add_record = {0}; + cnat_user_db_entry_t *udb = NULL; + u16 my_proto_mask; + u8 my_protocol; + cnat_nfv9_logging_info_t *nfv9_logging_info = NULL; + + if (PREDICT_FALSE(dslite_entry->nfv9_logging_index == EMPTY)) { + /* + * no logging configured, silently return + */ + return; + } + + nfv9_logging_info = + cnat_nfv9_logging_info_pool + dslite_entry->nfv9_logging_index; + udb = cnat_user_db + db->user_index; + + if (PREDICT_FALSE(!udb)) { + return; + } + + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(nfv9_logging_info->current_logging_context == NULL)) { + return; + } + } + + if (PREDICT_FALSE(nfv9_logging_info->record[DS_LITE_DEL_SESSION_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, DS_LITE_DEL_SESSION_RECORD); + } + /* + * We should definitely have add_record now, no need to sanitize + */ + nfv9_logging_add_record.inside_vrf_id = + clib_host_to_net_u32(dslite_entry->i_vrf_id); + + nfv9_logging_add_record.inside_ip_addr = + clib_host_to_net_u32(db->in2out_key.k.ipv4); + + nfv9_logging_add_record.inside_v6_src_addr[0] = + clib_host_to_net_u32(udb->ipv6[0]); + nfv9_logging_add_record.inside_v6_src_addr[1] = + clib_host_to_net_u32(udb->ipv6[1]); + nfv9_logging_add_record.inside_v6_src_addr[2] = + clib_host_to_net_u32(udb->ipv6[2]); + nfv9_logging_add_record.inside_v6_src_addr[3] = + clib_host_to_net_u32(udb->ipv6[3]); + + nfv9_logging_add_record.inside_ip_port = + clib_host_to_net_u16(db->in2out_key.k.port); + + /* If sdb is null, it is assumed that logging is being done + * for the first destination which is held in the main db + * itself + */ + if(PREDICT_TRUE(sdb == NULL)) { + nfv9_logging_add_record.dest_ip_addr = + clib_host_to_net_u32(db->dst_ipv4); + nfv9_logging_add_record.dest_port = + clib_host_to_net_u16(db->dst_port); + } else { + nfv9_logging_add_record.dest_ip_addr = + clib_host_to_net_u32(sdb->v4_dest_key.k.ipv4); + nfv9_logging_add_record.dest_port = + clib_host_to_net_u16(sdb->v4_dest_key.k.port); + } + + + my_proto_mask = db->in2out_key.k.vrf & CNAT_PRO_MASK; + + my_protocol = ((my_proto_mask == CNAT_UDP) ? UDP_PROT : + ((my_proto_mask == CNAT_TCP) ? TCP_PROT : + ((my_proto_mask == CNAT_ICMP) ? ICMP_PROT : 0))); + nfv9_logging_add_record.protocol = my_protocol; + + memcpy(nfv9_logging_info->record[DS_LITE_DEL_SESSION_RECORD], + &nfv9_logging_add_record, CNAT_NFV9_DS_LITE_DEL_SESSION_RECORD_LENGTH); + + nfv9_logging_info->record_length[DS_LITE_DEL_SESSION_RECORD] + += CNAT_NFV9_DS_LITE_DEL_SESSION_RECORD_LENGTH; + + nfv9_logging_info->pkt_length += CNAT_NFV9_DS_LITE_DEL_SESSION_RECORD_LENGTH; + nfv9_logging_info->total_record_count += 1; + + nfv9_logging_info->record[DS_LITE_DEL_SESSION_RECORD] + += CNAT_NFV9_DS_LITE_DEL_SESSION_RECORD_LENGTH; + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[DS_LITE_DEL_SESSION_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[DS_LITE_DEL_SESSION_RECORD]); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } + +} + + +/* + * netflow logging API for ingress vrf_id to name mapping + */ + +/* + * edt: * * handle_vrfid_name_mapping + * It will search for valid natflow entry in netflow pool, + * once found one, will send all vrfid name mapping info + * using that entry + */ + + +inline +void handle_vrfid_name_mapping(void) +{ + cnat_nfv9_logging_info_t *nfv9_logging_info = NULL; + + pool_foreach (nfv9_logging_info, cnat_nfv9_logging_info_pool, ({ + if(PREDICT_FALSE(nfv9_logging_info == NULL)) { + continue; + } + nfv9_server_info_t *server = nfv9_server_info_pool + + nfv9_logging_info->server_index; + if(server->template_sent == TEMPLATE_SENT_TRUE) { + cnat_nfv9_ingress_vrfid_name_mapping_create(nfv9_logging_info); + server->template_sent = TEMPLATE_SENT_FALSE; + } + })); +} + +/* + * edt: * * cnat_nfv9_ingress_vrfid_name_mapping_create + * + * Tries to log vrfid-name mapping record + * Argument: netflow pointer + */ + + +void cnat_nfv9_ingress_vrfid_name_mapping_create( + cnat_nfv9_logging_info_t *nfv9_logging_info) +{ + u16 index = 0; + + for (index = 0; index < MAX_VRFID; index++) { + if(vrfid_name_map[index].ref_count == 0) { + continue; + } + if (PREDICT_FALSE( + nfv9_logging_info->current_logging_context == NULL)) { + cnat_nfv9_create_logging_context(nfv9_logging_info, + cnat_nfv9_template_add_default); + } + cnat_nfv9_insert_ingress_vrfid_name_record( + nfv9_logging_info,index); + if (PREDICT_FALSE(nfv9_logging_info->pkt_length > + nfv9_logging_info->max_length_minus_max_record_size) || + PREDICT_FALSE(index == MAX_VRFID - 1)) { + if (PREDICT_TRUE(nfv9_logging_info->current_logging_context + != NULL)) { + cnat_nfv9_send_pkt(nfv9_logging_info); + } + } + }/*for()*/ + return; +} + +static void cnat_nfv9_insert_ingress_vrfid_name_record( + cnat_nfv9_logging_info_t *nfv9_logging_info, u16 index) +{ + nfv9_ingress_vrfid_name_record_t nfv9_ingress_vrfid_name_record = {0}; + + if (PREDICT_FALSE( + nfv9_logging_info->record[INGRESS_VRF_ID_NAME_RECORD] == NULL)) { + cnat_nfv9_record_create(nfv9_logging_info, INGRESS_VRF_ID_NAME_RECORD); + } + nfv9_ingress_vrfid_name_record.ingress_vrf_id = + clib_host_to_net_u32(vrfid_name_map[index].vrf_id); + + memcpy(nfv9_ingress_vrfid_name_record.ingress_vrf_name, + vrfid_name_map[index].vrf_name, NFV9_VRF_NAME_LEN); + + memcpy(nfv9_logging_info->record[INGRESS_VRF_ID_NAME_RECORD], + &nfv9_ingress_vrfid_name_record, + CNAT_NFV9_INGRESS_VRFID_NAME_RECORD_LENGTH); + + nfv9_logging_info->record_length[INGRESS_VRF_ID_NAME_RECORD] + += CNAT_NFV9_INGRESS_VRFID_NAME_RECORD_LENGTH; + + nfv9_logging_info->pkt_length += + CNAT_NFV9_INGRESS_VRFID_NAME_RECORD_LENGTH; + + nfv9_logging_info->total_record_count += 1; + + nfv9_logging_info->record[INGRESS_VRF_ID_NAME_RECORD] + += CNAT_NFV9_INGRESS_VRFID_NAME_RECORD_LENGTH; + + nfv9_logging_info->next_data_ptr = + nfv9_logging_info->record[INGRESS_VRF_ID_NAME_RECORD]; + + nfv9_logging_info->dataflow_header->dataflow_length = + clib_host_to_net_u32( + nfv9_logging_info->record_length[INGRESS_VRF_ID_NAME_RECORD]); + return; +} +/* + * edt: * * cnat_log_timer_handler + * + * Timer handler for sending any pending NFV9 record + * + * Argument: spp_timer_t * timer_p + * Timer handler structure + */ +void handle_pending_nfv9_pkts() +{ + vlib_node_t *output_node; + vlib_main_t * vm = vlib_get_main(); + cnat_nfv9_logging_info_t *my_nfv9_logging_info = 0; + u32 current_timestamp = cnat_nfv9_get_sys_up_time_in_ms(); + u32 current_unix_time_in_seconds = cnat_nfv9_get_unix_time_in_seconds(); + + output_node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup"); + + pool_foreach (my_nfv9_logging_info, cnat_nfv9_logging_info_pool, ({ + nfv9_server_info_t *server = nfv9_server_info_pool + + my_nfv9_logging_info->server_index; + if (my_nfv9_logging_info->queued_logging_context || + (my_nfv9_logging_info->current_logging_context && + (current_timestamp - + my_nfv9_logging_info->current_logging_context_timestamp) + > 1000)) { + /* + * If there is a current logging context and timestamp + * indicates it is pending for long, send it out + * Also if there is a queued context send it out as well + */ + vlib_cli_output(vm, "\nNFV9_TIMER: queued %p, curr %p", + my_nfv9_logging_info->queued_logging_context, + my_nfv9_logging_info->current_logging_context); + + + cnat_nfv9_send_pkt_always_success(my_nfv9_logging_info, + output_node); + } else { + /* + * If the last_template_sent_time is too far back in time + * send the template even if there is no NFv9 records to send + */ + if ((my_nfv9_logging_info->queued_logging_context == NULL) && + (my_nfv9_logging_info->current_logging_context == NULL) && + ((current_unix_time_in_seconds - + server->last_template_sent_time) > + server->timeout_rate)) { + cnat_nfv9_create_logging_context(my_nfv9_logging_info, + cnat_nfv9_template_add_always); + if (PREDICT_TRUE(my_nfv9_logging_info->current_logging_context + != NULL)) { + cnat_nfv9_send_pkt(my_nfv9_logging_info); + } + } + } + })); +} + +/* + * Code to initialize NFV9 Template. This is done when a NFV9 is enabled + * It is done only once and later used when sending NFV9 template records. + */ +static void +cnat_nfv9_template_init (void) +{ + cnat_nfv9_template_info.flowset_id = + clib_host_to_net_u16(CNAT_NFV9_TEMPLATE_FLOWSET_ID); + cnat_nfv9_template_info.length = + clib_host_to_net_u16(CNAT_NFV9_TEMPLATE_LENGTH - + CNAT_NFV9_OPTION_TEMPLATE_LENGTH); + /* + * Create the add Template + */ + cnat_nfv9_template_info.add_template_id = + clib_host_to_net_u16(CNAT_NFV9_ADD_TEMPLATE_ID); + cnat_nfv9_template_info.add_field_count = + clib_host_to_net_u16(CNAT_NFV9_ADD_FIELD_COUNT); + + cnat_nfv9_template_info.add_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.add_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.add_outside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.add_outside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.add_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.add_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.add_outside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.add_outside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.add_inside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.add_inside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.add_outside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.add_outside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.add_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.add_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); + + /* + * Create the delete Template + */ + cnat_nfv9_template_info.del_template_id = + clib_host_to_net_u16(CNAT_NFV9_DEL_TEMPLATE_ID); + cnat_nfv9_template_info.del_field_count = + clib_host_to_net_u16(CNAT_NFV9_DEL_FIELD_COUNT); + + cnat_nfv9_template_info.del_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.del_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.del_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.del_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.del_inside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.del_inside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.del_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.del_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); + + + /* Create NAT64 BIB Add template */ +#if 0 + cnat_nfv9_template_info.nat64_add_bib_template_id = + clib_host_to_net_u16(CNAT_NFV9_NAT64_ADD_BIB_TEMPLATE_ID); + cnat_nfv9_template_info.nat64_add_bib_field_count = + clib_host_to_net_u16(CNAT_NFV9_NAT64_ADD_BIB_FIELD_COUNT); + + + cnat_nfv9_template_info.nat64_add_bib_inside_ipv6_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_bib_inside_ipv6_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_add_bib_outside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_bib_outside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_add_bib_inside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_bib_inside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_add_bib_outside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_bib_outside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_add_bib_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_bib_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); + + + /* NAT64 BIB Delete */ + cnat_nfv9_template_info.nat64_del_bib_template_id = + clib_host_to_net_u16(CNAT_NFV9_NAT64_DEL_BIB_TEMPLATE_ID); + cnat_nfv9_template_info.nat64_del_bib_field_count = + clib_host_to_net_u16(CNAT_NFV9_NAT64_DEL_BIB_FIELD_COUNT); + + cnat_nfv9_template_info.nat64_del_bib_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat64_del_bib_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_del_bib_inside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat64_del_bib_inside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_del_bib_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.nat64_del_bib_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); + + + /* NAt64 SESSION ADD */ + + cnat_nfv9_template_info.nat64_add_session_template_id = + clib_host_to_net_u16(CNAT_NFV9_NAT64_ADD_SESSION_TEMPLATE_ID); + cnat_nfv9_template_info.nat64_add_session_field_count = + clib_host_to_net_u16(CNAT_NFV9_NAT64_ADD_SESSION_FIELD_COUNT); + + + cnat_nfv9_template_info.nat64_add_session_inside_ipv6_src_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_session_inside_ipv6_src_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_add_session_outside_ip_src_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_session_outside_ip_src_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_SIZE); + + + cnat_nfv9_template_info.nat64_add_session_inside_ipv6_dst_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_DST_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_session_inside_ipv6_dst_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_DST_ADDR_FIELD_SIZE); + + + cnat_nfv9_template_info.nat64_add_session_outside_ip_dst_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_DST_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_session_outside_ip_dst_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_DST_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_add_session_inside_ip_src_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_session_inside_ip_src_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + + cnat_nfv9_template_info.nat64_add_session_outside_ip_src_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_session_outside_ip_src_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_SIZE); + + + cnat_nfv9_template_info.nat64_add_session_ip_dest_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_session_ip_dest_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_add_session_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.nat64_add_session_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); + + + + /* Session Delete */ + cnat_nfv9_template_info.nat64_del_session_template_id = + clib_host_to_net_u16(CNAT_NFV9_NAT64_DEL_SESSION_TEMPLATE_ID); + cnat_nfv9_template_info.nat64_del_session_field_count = + clib_host_to_net_u16(CNAT_NFV9_NAT64_DEL_SESSION_FIELD_COUNT); + + cnat_nfv9_template_info.nat64_del_session_inside_ip_src_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat64_del_session_inside_ip_src_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_del_session_inside_ip_dst_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_DST_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat64_del_session_inside_ip_dst_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_DST_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_del_session_inside_ip_src_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat64_del_session_inside_ip_src_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_del_session_inside_ip_dst_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat64_del_session_inside_ip_dst_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.nat64_del_session_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.nat64_del_session_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); +#endif + /* + * Create the nat44 session add Template + */ + cnat_nfv9_template_info.nat44_session_add_template_id = + clib_host_to_net_u16(CNAT_NFV9_NAT44_ADD_SESSION_TEMPLATE_ID); + cnat_nfv9_template_info.nat44_session_add_field_count = + clib_host_to_net_u16(CNAT_NFV9_NAT44_ADD_SESSION_FIELD_COUNT); + + cnat_nfv9_template_info.nat44_session_add_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_add_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_add_outside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_add_outside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_add_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_add_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_add_outside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_add_outside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_add_inside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_add_inside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_add_outside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_add_outside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_add_dest_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_DESTINATION_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_add_dest_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_DESTINATION_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_add_dest_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_add_dest_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_add_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_add_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); + + /* + * Create the nat44 session del Template + */ + cnat_nfv9_template_info.nat44_session_del_template_id = + clib_host_to_net_u16(CNAT_NFV9_NAT44_DEL_SESSION_TEMPLATE_ID); + cnat_nfv9_template_info.nat44_session_del_field_count = + clib_host_to_net_u16(CNAT_NFV9_NAT44_DEL_SESSION_FIELD_COUNT); + + cnat_nfv9_template_info.nat44_session_del_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_del_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_del_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_del_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_del_dest_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_DESTINATION_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_del_dest_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_DESTINATION_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_del_inside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_del_inside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_del_dest_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_del_dest_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.nat44_session_del_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.nat44_session_del_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); + /* + * Ds-lite add template + */ +#if 0 + cnat_nfv9_template_info.add_dslite_template_id = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_ADD_TEMPLATE_ID); + cnat_nfv9_template_info.add_dslite_field_count = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_ADD_FIELD_COUNT); + + cnat_nfv9_template_info.add_dslite_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_outside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_outside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_inside_ipv6_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_inside_ipv6_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_outside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_outside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_inside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_inside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_outside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_outside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); + + /* + * Ds-lite delete template + */ + cnat_nfv9_template_info.del_dslite_template_id = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_DEL_TEMPLATE_ID); + cnat_nfv9_template_info.del_dslite_field_count = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_DEL_FIELD_COUNT); + + cnat_nfv9_template_info.del_dslite_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.del_dslite_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.del_dslite_inside_ipv6_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_inside_ipv6_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.del_dslite_inside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_inside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.del_dslite_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); + + /* + * Ds-lite session add template + */ + + cnat_nfv9_template_info.add_dslite_session_template_id = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_ADD_SESSION_TEMPLATE_ID); + cnat_nfv9_template_info.add_dslite_session_field_count = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_ADD_SESSION_FIELD_COUNT); + + cnat_nfv9_template_info.add_dslite_session_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_session_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_session_outside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_session_outside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_session_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_session_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_session_inside_ipv6_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_session_inside_ipv6_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_session_outside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_session_outside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_session_inside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_session_inside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_session_outside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_session_outside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_session_dest_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_DESTINATION_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_session_dest_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_DESTINATION_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_session_dest_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_session_dest_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.add_dslite_session_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.add_dslite_session_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); + + /* + * Ds-lite session delete template + */ + cnat_nfv9_template_info.del_dslite_session_template_id = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_DEL_SESSION_TEMPLATE_ID); + cnat_nfv9_template_info.del_dslite_session_field_count = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_DEL_SESSION_FIELD_COUNT); + + cnat_nfv9_template_info.del_dslite_session_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_session_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.del_dslite_session_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_session_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.del_dslite_session_inside_ipv6_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_session_inside_ipv6_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.del_dslite_session_inside_ip_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_session_inside_ip_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.del_dslite_session_dest_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_DESTINATION_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_session_dest_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_DESTINATION_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.del_dslite_session_dest_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_session_dest_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_DST_PORT_FIELD_SIZE); + + cnat_nfv9_template_info.del_dslite_session_protocol_field_type = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_TYPE); + cnat_nfv9_template_info.del_dslite_session_protocol_field_size = + clib_host_to_net_u16(CNAT_NFV9_PROTOCOL_FIELD_SIZE); + + /* Create add bulk template */ + cnat_nfv9_template_info.bulk_add_template_id = + clib_host_to_net_u16(CNAT_NFV9_NAT44_BULK_ADD_TEMPLATE_ID); + cnat_nfv9_template_info.bulk_add_field_count = + clib_host_to_net_u16(CNAT_NFV9_NAT44_BULK_ADD_FIELD_COUNT); + + cnat_nfv9_template_info.bulk_add_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.bulk_add_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_add_outside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.bulk_add_outside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_add_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.bulk_add_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_add_outside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.bulk_add_outside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_add_outside_start_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_START_FIELD_TYPE); + cnat_nfv9_template_info.bulk_add_outside_start_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_START_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_add_outside_end_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_END_FIELD_TYPE); + cnat_nfv9_template_info.bulk_add_outside_end_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_END_FIELD_SIZE); + + /* + * Create the bulk delete Template + */ + cnat_nfv9_template_info.bulk_del_template_id = + clib_host_to_net_u16(CNAT_NFV9_NAT44_BULK_DEL_TEMPLATE_ID); + cnat_nfv9_template_info.bulk_del_field_count = + clib_host_to_net_u16(CNAT_NFV9_NAT44_BULK_DEL_FIELD_COUNT); + + cnat_nfv9_template_info.bulk_del_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.bulk_del_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_del_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.bulk_del_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_del_outside_start_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_START_FIELD_TYPE); + cnat_nfv9_template_info.bulk_del_outside_start_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_START_FIELD_SIZE); + + /* + * Ds-lite bulk add template + */ + cnat_nfv9_template_info.bulk_dslite_add_template_id = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_BULK_ADD_TEMPLATE_ID); + cnat_nfv9_template_info.bulk_dslite_add_field_count = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_BULK_ADD_FIELD_COUNT); + + cnat_nfv9_template_info.bulk_dslite_add_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.bulk_dslite_add_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_dslite_add_outside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.bulk_dslite_add_outside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_dslite_add_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.bulk_dslite_add_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_dslite_add_inside_ipv6_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.bulk_dslite_add_inside_ipv6_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_dslite_add_outside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.bulk_dslite_add_outside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_dslite_add_outside_start_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_START_FIELD_TYPE); + cnat_nfv9_template_info.bulk_dslite_add_outside_start_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_START_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_dslite_add_outside_end_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_END_FIELD_TYPE); + cnat_nfv9_template_info.bulk_dslite_add_outside_end_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_END_FIELD_SIZE); + + /* + * Ds-lite bulk delete template + */ + + cnat_nfv9_template_info.bulk_dslite_del_template_id = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_BULK_DEL_TEMPLATE_ID); + cnat_nfv9_template_info.bulk_dslite_del_field_count = + clib_host_to_net_u16(CNAT_NFV9_DS_LITE_BULK_DEL_FIELD_COUNT); + + cnat_nfv9_template_info.bulk_dslite_del_inside_vrf_id_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + cnat_nfv9_template_info.bulk_dslite_del_inside_vrf_id_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_dslite_del_inside_ip_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.bulk_dslite_del_inside_ip_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_dslite_del_inside_ipv6_addr_field_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_TYPE); + cnat_nfv9_template_info.bulk_dslite_del_inside_ipv6_addr_field_size = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_SIZE); + + cnat_nfv9_template_info.bulk_dslite_del_outside_start_port_field_type = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_START_FIELD_TYPE); + cnat_nfv9_template_info.bulk_dslite_del_outside_start_port_field_size = + clib_host_to_net_u16(CNAT_NFV9_OUTSIDE_IP_PORT_START_FIELD_SIZE); + +#endif /* NO_BULK_LOGGING */ + + /* + * Ingress vrfid - name mapping + */ + CNAT_NFV9_OPTION_TEMPLATE.flowset_id = + clib_host_to_net_u16(CNAT_NFV9_OPTION_TEMPLATE_FLOWSET_ID); + CNAT_NFV9_OPTION_TEMPLATE.length = + clib_host_to_net_u16(CNAT_NFV9_OPTION_TEMPLATE_LENGTH); + + CNAT_NFV9_OPTION_TEMPLATE.ingress_vrfid_name_map_template_id = + clib_host_to_net_u16(CNAT_NFV9_INGRESS_VRF_ID_NAME_TEMPLATE_ID); + /* currently no scope field supported */ + CNAT_NFV9_OPTION_TEMPLATE.ingress_vrfid_name_map_scope_len = 0; + CNAT_NFV9_OPTION_TEMPLATE.ingress_vrfid_name_map_option_len = + clib_host_to_net_u16(CNAT_NFV9_INGRESS_VRF_ID_NAME_OPTION_LEN); + CNAT_NFV9_OPTION_TEMPLATE.ingress_vrfid_name_map_vrfid_option_type = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE); + CNAT_NFV9_OPTION_TEMPLATE.ingress_vrfid_name_map_vrfid_option_len = + clib_host_to_net_u16(CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE); + CNAT_NFV9_OPTION_TEMPLATE.ingress_vrfid_name_map_vrfname_option_type = + clib_host_to_net_u16(CNAT_NFV9_INGRESS_VRF_NAME_FIELD_TYPE); + CNAT_NFV9_OPTION_TEMPLATE.ingress_vrfid_name_map_vrfname_option_len = + clib_host_to_net_u16(CNAT_NFV9_INGRESS_VRF_NAME_FIELD_SIZE); + + /* + * Set the padding (which was added to make the size of template + * multiple of 4) to zero + */ + CNAT_NFV9_OPTION_TEMPLATE.padding1 = 0; +} + +/* + * one time function + * has to be called at the init time + */ +void cnat_nfv9_logging_init() +{ + if (!cnat_nfv9_global_info.cnat_nfv9_init_done) { + cnat_nfv9_template_init(); + + /* Pre allocate for NFV9_SERVER_POOL_SIZE. Will be good + * enough for most deployments + */ + pool_alloc(nfv9_server_info_pool, NFV9_SERVER_POOL_SIZE); + int i; + nfv9_server_info_t *server __attribute__((unused)); + for(i = 0; i < NFV9_SERVER_POOL_SIZE; i++) { + pool_get(nfv9_server_info_pool, server); + } + + for(i = 0; i < NFV9_SERVER_POOL_SIZE; i++) { + pool_put(nfv9_server_info_pool, nfv9_server_info_pool + i); + } + + memset(&cnat_nfv9_global_info, 0 , sizeof(cnat_nfv9_global_info_t)); + ASSERT(cnat_nfv9_global_info.cnat_nfv9_disp_node_index != (u16)~0); + + cnat_nfv9_global_info.cnat_nfv9_global_collector_index = EMPTY; + cnat_nfv9_global_info.cnat_nfv9_init_done = 1; + + /* + * src id is set to infra IPv4 address + octeon core number + */ + nfv9_src_id = my_instance_number; + } +} diff --git a/vnet/vnet/vcgn/cnat_logging.h b/vnet/vnet/vcgn/cnat_logging.h new file mode 100644 index 00000000000..7bd43ecf21e --- /dev/null +++ b/vnet/vnet/vcgn/cnat_logging.h @@ -0,0 +1,1091 @@ +/* + *------------------------------------------------------------------ + * cnat_logging.h + * + * Copyright (c) 2009, 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_LOGGING_H__ +#define __CNAT_LOGGING_H__ + +#include <stdio.h> +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> +#include <vppinfra/clib.h> + +#include "nat64_db.h" +#include "cnat_log_common.h" +#include "dslite_defs.h" + +#define NFV9_DEF_PATH_MTU 1500 +#define NFV9_VRF_NAME_LEN 12 + +/* one time call at the beginning */ +void cnat_nfv9_logging_init(); + +/* + * unconditional call + * will check logging config inside + */ +void cnat_nfv9_log_mapping_create(cnat_main_db_entry_t * db, + cnat_vrfmap_t *vrfmap +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ); + +void cnat_nfv9_nat44_log_session_create(cnat_main_db_entry_t * db, + cnat_session_entry_t * sdb, + cnat_vrfmap_t *vrfmap); + +void cnat_nfv9_nat44_log_session_delete(cnat_main_db_entry_t * db, + cnat_session_entry_t * sdb, + cnat_vrfmap_t *vrfmap); + + +/* + * unconditional call + * will check logging config inside + */ +void cnat_nfv9_log_mapping_delete(cnat_main_db_entry_t * db, + cnat_vrfmap_t *vrfmap +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ); + +/* nat44 syslog APIs */ +void cnat_syslog_nat44_mapping_create(cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap, cnat_session_entry_t * sdb +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ); + +void cnat_syslog_nat44_mapping_delete(cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap, cnat_session_entry_t *sdb +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ); + +/* + * dslite + */ +void cnat_nfv9_ds_lite_mapping_create(cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ); + +void cnat_nfv9_ds_lite_mapping_delete(cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ); +void cnat_nfv9_ds_lite_log_session_create(cnat_main_db_entry_t * db, + dslite_table_entry_t *dslite_entry, + cnat_session_entry_t * sdb); + +void cnat_nfv9_ds_lite_log_session_delete(cnat_main_db_entry_t * db, + dslite_table_entry_t *dslite_entry, + cnat_session_entry_t * sdb); + +/* + * nat64 + */ + +void cnat_nfv9_bib_mapping_create (nat64_bib_entry_t *db, + nat64_table_entry_t *nat64_entry); + +void cnat_nfv9_session_mapping_create (nat64_bib_entry_t *bdb, + nat64_session_entry_t *sdb, + nat64_table_entry_t *nat64_entry_ptr); + +void cnat_nfv9_bib_mapping_delete (nat64_bib_entry_t *db, + nat64_table_entry_t *nat64_entry); + +void cnat_nfv9_session_mapping_delete (nat64_bib_entry_t *bdb, + nat64_session_entry_t *sdb, + nat64_table_entry_t *nat64_entry_ptr); + +typedef enum { + RECORD_INVALID = 0, + NAT44_ADD_RECORD, + NAT44_DEL_RECORD, + NAT64_ADD_BIB_RECORD, + NAT64_DEL_BIB_RECORD, + NAT64_ADD_SESSION_RECORD, + NAT64_DEL_SESSION_RECORD, + DS_LITE_ADD_RECORD, + DS_LITE_DEL_RECORD, + NAT44_BULK_ADD_RECORD, + NAT44_BULK_DEL_RECORD, + DS_LITE_BULK_ADD_RECORD, + DS_LITE_BULK_DEL_RECORD, + INGRESS_VRF_ID_NAME_RECORD, + NAT44_ADD_SESSION_RECORD, + NAT44_DEL_SESSION_RECORD, + DS_LITE_ADD_SESSION_RECORD, + DS_LITE_DEL_SESSION_RECORD, + MAX_RECORDS +} netflow_record; + +typedef enum { + TEMPLATE_SENT_FALSE = 0, + TEMPLATE_SENT_TRUE = 1 +} netflow_template_sent; + +#define cnat_nfv9_get_sys_up_time_in_ms cnat_get_sys_up_time_in_ms + +#define cnat_nfv9_get_unix_time_in_seconds cnat_get_unix_time_in_seconds + +#define cnat_nfv9_dump_time_change_logs cnat_dump_time_change_logs + + +/* + * Netflow V9 Specific Defines and structures + */ + +#define CNAT_NFV9_VERSION_NUMBER 9 + +#define CNAT_NFV9_TEMPLATE_FLOWSET_ID 0 +#define CNAT_NFV9_OPTION_TEMPLATE_FLOWSET_ID 1 + +#define CNAT_NFV9_ADD_FIELD_COUNT 7 +#define CNAT_NFV9_DEL_FIELD_COUNT 4 +#define CNAT_NFV9_DS_LITE_ADD_FIELD_COUNT 8 +#define CNAT_NFV9_DS_LITE_DEL_FIELD_COUNT 5 +#define CNAT_NFV9_NAT64_ADD_BIB_FIELD_COUNT 5 +#define CNAT_NFV9_NAT64_DEL_BIB_FIELD_COUNT 3 +#define CNAT_NFV9_NAT64_ADD_SESSION_FIELD_COUNT 8 +#define CNAT_NFV9_NAT64_DEL_SESSION_FIELD_COUNT 5 +#define CNAT_NFV9_NAT44_ADD_SESSION_FIELD_COUNT 9 +#define CNAT_NFV9_NAT44_DEL_SESSION_FIELD_COUNT 6 +#define CNAT_NFV9_DS_LITE_ADD_SESSION_FIELD_COUNT 10 +#define CNAT_NFV9_DS_LITE_DEL_SESSION_FIELD_COUNT 7 + +#define CNAT_NFV9_ADD_TEMPLATE_ID 256 +#define CNAT_NFV9_DEL_TEMPLATE_ID 257 +#define CNAT_NFV9_NAT64_ADD_BIB_TEMPLATE_ID 258 +#define CNAT_NFV9_NAT64_DEL_BIB_TEMPLATE_ID 259 +#define CNAT_NFV9_NAT64_ADD_SESSION_TEMPLATE_ID 260 +#define CNAT_NFV9_NAT64_DEL_SESSION_TEMPLATE_ID 261 +#define CNAT_NFV9_INGRESS_VRF_ID_NAME_TEMPLATE_ID 262 +#define CNAT_NFV9_DS_LITE_ADD_TEMPLATE_ID 267 +#define CNAT_NFV9_DS_LITE_DEL_TEMPLATE_ID 268 +#define CNAT_NFV9_NAT44_ADD_SESSION_TEMPLATE_ID 271 +#define CNAT_NFV9_NAT44_DEL_SESSION_TEMPLATE_ID 272 +#define CNAT_NFV9_DS_LITE_ADD_SESSION_TEMPLATE_ID 273 +#define CNAT_NFV9_DS_LITE_DEL_SESSION_TEMPLATE_ID 274 + +#ifndef NO_BULK_LOGGING +#define CNAT_NFV9_NAT44_BULK_ADD_TEMPLATE_ID 265 +#define CNAT_NFV9_NAT44_BULK_DEL_TEMPLATE_ID 266 +#define CNAT_NFV9_DS_LITE_BULK_ADD_TEMPLATE_ID 269 +#define CNAT_NFV9_DS_LITE_BULK_DEL_TEMPLATE_ID 270 + +#define CNAT_NFV9_NAT44_BULK_ADD_FIELD_COUNT 6 +#define CNAT_NFV9_NAT44_BULK_DEL_FIELD_COUNT 3 +#define CNAT_NFV9_DS_LITE_BULK_ADD_FIELD_COUNT 7 +#define CNAT_NFV9_DS_LITE_BULK_DEL_FIELD_COUNT 4 + +#define CNAT_NFV9_OUTSIDE_IP_PORT_START_FIELD_TYPE 361 +#define CNAT_NFV9_OUTSIDE_IP_PORT_START_FIELD_SIZE 2 + +#define CNAT_NFV9_OUTSIDE_IP_PORT_END_FIELD_TYPE 362 +#define CNAT_NFV9_OUTSIDE_IP_PORT_END_FIELD_SIZE 2 + +#endif /* #ifndef NO_BULK_LOGGING */ + +#define CNAT_NFV9_INGRESS_VRF_NAME_FIELD_TYPE 236 +#define CNAT_NFV9_INGRESS_VRF_NAME_FIELD_SIZE 12 +/* 4 byte for vrf_id + 4 byte for vrf_name (option fields) */ +#define CNAT_NFV9_INGRESS_VRF_ID_NAME_OPTION_LEN 8 +extern u16 cnat_template_id[MAX_RECORDS]; + +#define CNAT_NFV9_INSIDE_VRFID_FIELD_TYPE 234 +#define CNAT_NFV9_INSIDE_VRFID_FIELD_SIZE 4 + +#define CNAT_NFV9_OUTSIDE_VRFID_FIELD_TYPE 235 +#define CNAT_NFV9_OUTSIDE_VRFID_FIELD_SIZE 4 + +#define CNAT_NFV9_INSIDE_IP_ADDR_FIELD_TYPE 8 +#define CNAT_NFV9_INSIDE_IP_ADDR_FIELD_SIZE 4 + +#define CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_TYPE 225 +#define CNAT_NFV9_OUTSIDE_IP_ADDR_FIELD_SIZE 4 + +#define CNAT_NFV9_INSIDE_IP_PORT_FIELD_TYPE 7 +#define CNAT_NFV9_INSIDE_IP_PORT_FIELD_SIZE 2 + +#define CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_TYPE 227 +#define CNAT_NFV9_OUTSIDE_IP_PORT_FIELD_SIZE 2 + +#define CNAT_NFV9_PROTOCOL_FIELD_TYPE 4 +#define CNAT_NFV9_PROTOCOL_FIELD_SIZE 1 + +/* IPv6 related info */ + +#define CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_TYPE 27 +#define CNAT_NFV9_INSIDE_IPV6_SRC_ADDR_FIELD_SIZE 16 + +#define CNAT_NFV9_INSIDE_IPV6_DST_ADDR_FIELD_TYPE 28 +#define CNAT_NFV9_INSIDE_IPV6_DST_ADDR_FIELD_SIZE 16 + +#define CNAT_NFV9_OUTSIDE_IP_DST_ADDR_FIELD_TYPE 226 +#define CNAT_NFV9_OUTSIDE_IP_DST_ADDR_FIELD_SIZE 4 + +#define CNAT_NFV9_INSIDE_DST_PORT_FIELD_TYPE 11 +#define CNAT_NFV9_INSIDE_DST_PORT_FIELD_SIZE 2 + +#define CNAT_NFV9_DESTINATION_IP_ADDR_FIELD_TYPE 12 +#define CNAT_NFV9_DESTINATION_IP_ADDR_FIELD_SIZE 4 + + +typedef struct { + u16 version; + u16 count; + u32 sys_up_time; /* time in ms since system was booted */ + u32 timestamp; /* UNIX time in seconds since 1970 */ + u32 sequence_num; + u32 source_id; +} nfv9_header_t; + +/* + * Hardcoded - need to be fixed + */ +#define CNAT_NFV9_SOURCE_ID_VALUE 0x1234 + +typedef struct { + u16 flowset_id; + u16 length; + + u16 ingress_vrfid_name_map_template_id; + u16 ingress_vrfid_name_map_scope_len; + u16 ingress_vrfid_name_map_option_len; + u16 ingress_vrfid_name_map_vrfid_option_type; + u16 ingress_vrfid_name_map_vrfid_option_len; + u16 ingress_vrfid_name_map_vrfname_option_type; + u16 ingress_vrfid_name_map_vrfname_option_len; + /* + * Adding the padding so as to make the tempalate + * structure end on a 4 byte boundary + */ + u16 padding1; + +} cnat_nfv9_option_template_t; + +/* + * The following structure defines the Netflow Template that + * will be exported to the Netflow Collector + */ + +typedef struct { + u16 flowset_id; + u16 length; + + u16 add_template_id; + u16 add_field_count; + u16 add_inside_vrf_id_field_type; + u16 add_inside_vrf_id_field_size; + u16 add_outside_vrf_id_field_type; + u16 add_outside_vrf_id_field_size; + u16 add_inside_ip_addr_field_type; + u16 add_inside_ip_addr_field_size; + u16 add_outside_ip_addr_field_type; + u16 add_outside_ip_addr_field_size; + u16 add_inside_ip_port_field_type; + u16 add_inside_ip_port_field_size; + u16 add_outside_ip_port_field_type; + u16 add_outside_ip_port_field_size; + u16 add_protocol_field_type; + u16 add_protocol_field_size; + + u16 del_template_id; + u16 del_field_count; + u16 del_inside_vrf_id_field_type; + u16 del_inside_vrf_id_field_size; + u16 del_inside_ip_addr_field_type; + u16 del_inside_ip_addr_field_size; + u16 del_inside_ip_port_field_type; + u16 del_inside_ip_port_field_size; + u16 del_protocol_field_type; + u16 del_protocol_field_size; +#if 0 + /* NAT64 related info */ + u16 nat64_add_bib_template_id; + u16 nat64_add_bib_field_count; + u16 nat64_add_bib_inside_ipv6_addr_field_type; + u16 nat64_add_bib_inside_ipv6_addr_field_size; + u16 nat64_add_bib_outside_ip_addr_field_type; + u16 nat64_add_bib_outside_ip_addr_field_size; + u16 nat64_add_bib_inside_ip_port_field_type; + u16 nat64_add_bib_inside_ip_port_field_size; + u16 nat64_add_bib_outside_ip_port_field_type; + u16 nat64_add_bib_outside_ip_port_field_size; + u16 nat64_add_bib_protocol_field_type; + u16 nat64_add_bib_protocol_field_size; + + u16 nat64_del_bib_template_id; + u16 nat64_del_bib_field_count; + u16 nat64_del_bib_inside_ip_addr_field_type; + u16 nat64_del_bib_inside_ip_addr_field_size; + u16 nat64_del_bib_inside_ip_port_field_type; + u16 nat64_del_bib_inside_ip_port_field_size; + u16 nat64_del_bib_protocol_field_type; + u16 nat64_del_bib_protocol_field_size; + + + u16 nat64_add_session_template_id; + u16 nat64_add_session_field_count; + u16 nat64_add_session_inside_ipv6_src_addr_field_type; + u16 nat64_add_session_inside_ipv6_src_addr_field_size; + u16 nat64_add_session_outside_ip_src_addr_field_type; + u16 nat64_add_session_outside_ip_src_addr_field_size; + u16 nat64_add_session_inside_ipv6_dst_addr_field_type; + u16 nat64_add_session_inside_ipv6_dst_addr_field_size; + u16 nat64_add_session_outside_ip_dst_addr_field_type; + u16 nat64_add_session_outside_ip_dst_addr_field_size; + u16 nat64_add_session_inside_ip_src_port_field_type; + u16 nat64_add_session_inside_ip_src_port_field_size; + u16 nat64_add_session_outside_ip_src_port_field_type; + u16 nat64_add_session_outside_ip_src_port_field_size; + u16 nat64_add_session_ip_dest_port_field_type; + u16 nat64_add_session_ip_dest_port_field_size; + u16 nat64_add_session_protocol_field_type; + u16 nat64_add_session_protocol_field_size; + + u16 nat64_del_session_template_id; + u16 nat64_del_session_field_count; + u16 nat64_del_session_inside_ip_src_addr_field_type; + u16 nat64_del_session_inside_ip_src_addr_field_size; + u16 nat64_del_session_inside_ip_dst_addr_field_type; + u16 nat64_del_session_inside_ip_dst_addr_field_size; + u16 nat64_del_session_inside_ip_src_port_field_type; + u16 nat64_del_session_inside_ip_src_port_field_size; + u16 nat64_del_session_inside_ip_dst_port_field_type; + u16 nat64_del_session_inside_ip_dst_port_field_size; + u16 nat64_del_session_protocol_field_type; + u16 nat64_del_session_protocol_field_size; + + /* + * Ds-Lite specific info + */ + u16 add_dslite_template_id; + u16 add_dslite_field_count; + u16 add_dslite_inside_vrf_id_field_type; + u16 add_dslite_inside_vrf_id_field_size; + u16 add_dslite_outside_vrf_id_field_type; + u16 add_dslite_outside_vrf_id_field_size; + u16 add_dslite_inside_ip_addr_field_type; + u16 add_dslite_inside_ip_addr_field_size; + u16 add_dslite_inside_ipv6_addr_field_type; + u16 add_dslite_inside_ipv6_addr_field_size; + u16 add_dslite_outside_ip_addr_field_type; + u16 add_dslite_outside_ip_addr_field_size; + u16 add_dslite_inside_ip_port_field_type; + u16 add_dslite_inside_ip_port_field_size; + u16 add_dslite_outside_ip_port_field_type; + u16 add_dslite_outside_ip_port_field_size; + u16 add_dslite_protocol_field_type; + u16 add_dslite_protocol_field_size; + + u16 del_dslite_template_id; + u16 del_dslite_field_count; + u16 del_dslite_inside_vrf_id_field_type; + u16 del_dslite_inside_vrf_id_field_size; + u16 del_dslite_inside_ip_addr_field_type; + u16 del_dslite_inside_ip_addr_field_size; + u16 del_dslite_inside_ipv6_addr_field_type; + u16 del_dslite_inside_ipv6_addr_field_size; + u16 del_dslite_inside_ip_port_field_type; + u16 del_dslite_inside_ip_port_field_size; + u16 del_dslite_protocol_field_type; + u16 del_dslite_protocol_field_size; +#endif + +//#ifndef NO_BULK_LOGGING /* commenting for time being */ +#if 0 + u16 bulk_add_template_id; + u16 bulk_add_field_count; + u16 bulk_add_inside_vrf_id_field_type; + u16 bulk_add_inside_vrf_id_field_size; + u16 bulk_add_outside_vrf_id_field_type; + u16 bulk_add_outside_vrf_id_field_size; + u16 bulk_add_inside_ip_addr_field_type; + u16 bulk_add_inside_ip_addr_field_size; + u16 bulk_add_outside_ip_addr_field_type; + u16 bulk_add_outside_ip_addr_field_size; + u16 bulk_add_outside_start_port_field_type; + u16 bulk_add_outside_start_port_field_size; + u16 bulk_add_outside_end_port_field_type; + u16 bulk_add_outside_end_port_field_size; + + u16 bulk_del_template_id; + u16 bulk_del_field_count; + u16 bulk_del_inside_vrf_id_field_type; + u16 bulk_del_inside_vrf_id_field_size; + u16 bulk_del_inside_ip_addr_field_type; + u16 bulk_del_inside_ip_addr_field_size; + u16 bulk_del_outside_start_port_field_type; + u16 bulk_del_outside_start_port_field_size; + + /* ds-lite bulk logging create delete event */ + + u16 bulk_dslite_add_template_id; + u16 bulk_dslite_add_field_count; + u16 bulk_dslite_add_inside_vrf_id_field_type; + u16 bulk_dslite_add_inside_vrf_id_field_size; + u16 bulk_dslite_add_outside_vrf_id_field_type; + u16 bulk_dslite_add_outside_vrf_id_field_size; + u16 bulk_dslite_add_inside_ip_addr_field_type; + u16 bulk_dslite_add_inside_ip_addr_field_size; + u16 bulk_dslite_add_inside_ipv6_addr_field_type; + u16 bulk_dslite_add_inside_ipv6_addr_field_size; + u16 bulk_dslite_add_outside_ip_addr_field_type; + u16 bulk_dslite_add_outside_ip_addr_field_size; + u16 bulk_dslite_add_outside_start_port_field_type; + u16 bulk_dslite_add_outside_start_port_field_size; + u16 bulk_dslite_add_outside_end_port_field_type; + u16 bulk_dslite_add_outside_end_port_field_size; + + u16 bulk_dslite_del_template_id; + u16 bulk_dslite_del_field_count; + u16 bulk_dslite_del_inside_vrf_id_field_type; + u16 bulk_dslite_del_inside_vrf_id_field_size; + u16 bulk_dslite_del_inside_ip_addr_field_type; + u16 bulk_dslite_del_inside_ip_addr_field_size; + u16 bulk_dslite_del_inside_ipv6_addr_field_type; + u16 bulk_dslite_del_inside_ipv6_addr_field_size; + u16 bulk_dslite_del_outside_start_port_field_type; + u16 bulk_dslite_del_outside_start_port_field_size; + +#endif /* NO_BULK_LOGGING */ + + u16 nat44_session_add_template_id; + u16 nat44_session_add_field_count; + u16 nat44_session_add_inside_vrf_id_field_type; + u16 nat44_session_add_inside_vrf_id_field_size; + u16 nat44_session_add_outside_vrf_id_field_type; + u16 nat44_session_add_outside_vrf_id_field_size; + u16 nat44_session_add_inside_ip_addr_field_type; + u16 nat44_session_add_inside_ip_addr_field_size; + u16 nat44_session_add_outside_ip_addr_field_type; + u16 nat44_session_add_outside_ip_addr_field_size; + u16 nat44_session_add_inside_ip_port_field_type; + u16 nat44_session_add_inside_ip_port_field_size; + u16 nat44_session_add_outside_ip_port_field_type; + u16 nat44_session_add_outside_ip_port_field_size; + u16 nat44_session_add_dest_ip_addr_field_type; + u16 nat44_session_add_dest_ip_addr_field_size; + u16 nat44_session_add_dest_port_field_type; + u16 nat44_session_add_dest_port_field_size; + u16 nat44_session_add_protocol_field_type; + u16 nat44_session_add_protocol_field_size; + + u16 nat44_session_del_template_id; + u16 nat44_session_del_field_count; + u16 nat44_session_del_inside_vrf_id_field_type; + u16 nat44_session_del_inside_vrf_id_field_size; + u16 nat44_session_del_inside_ip_addr_field_type; + u16 nat44_session_del_inside_ip_addr_field_size; + u16 nat44_session_del_dest_ip_addr_field_type; + u16 nat44_session_del_dest_ip_addr_field_size; + u16 nat44_session_del_inside_ip_port_field_type; + u16 nat44_session_del_inside_ip_port_field_size; + u16 nat44_session_del_dest_port_field_type; + u16 nat44_session_del_dest_port_field_size; + u16 nat44_session_del_protocol_field_type; + u16 nat44_session_del_protocol_field_size; + +#if 0 + u16 add_dslite_session_template_id; + u16 add_dslite_session_field_count; + u16 add_dslite_session_inside_vrf_id_field_type; + u16 add_dslite_session_inside_vrf_id_field_size; + u16 add_dslite_session_outside_vrf_id_field_type; + u16 add_dslite_session_outside_vrf_id_field_size; + u16 add_dslite_session_inside_ip_addr_field_type; + u16 add_dslite_session_inside_ip_addr_field_size; + u16 add_dslite_session_inside_ipv6_addr_field_type; + u16 add_dslite_session_inside_ipv6_addr_field_size; + u16 add_dslite_session_outside_ip_addr_field_type; + u16 add_dslite_session_outside_ip_addr_field_size; + u16 add_dslite_session_inside_ip_port_field_type; + u16 add_dslite_session_inside_ip_port_field_size; + u16 add_dslite_session_outside_ip_port_field_type; + u16 add_dslite_session_outside_ip_port_field_size; + u16 add_dslite_session_dest_ip_addr_field_type; + u16 add_dslite_session_dest_ip_addr_field_size; + u16 add_dslite_session_dest_port_field_type; + u16 add_dslite_session_dest_port_field_size; + u16 add_dslite_session_protocol_field_type; + u16 add_dslite_session_protocol_field_size; + + u16 del_dslite_session_template_id; + u16 del_dslite_session_field_count; + u16 del_dslite_session_inside_vrf_id_field_type; + u16 del_dslite_session_inside_vrf_id_field_size; + u16 del_dslite_session_inside_ip_addr_field_type; + u16 del_dslite_session_inside_ip_addr_field_size; + u16 del_dslite_session_inside_ipv6_addr_field_type; + u16 del_dslite_session_inside_ipv6_addr_field_size; + u16 del_dslite_session_dest_ip_addr_field_type; + u16 del_dslite_session_dest_ip_addr_field_size; + u16 del_dslite_session_inside_ip_port_field_type; + u16 del_dslite_session_inside_ip_port_field_size; + u16 del_dslite_session_dest_port_field_type; + u16 del_dslite_session_dest_port_field_size; + u16 del_dslite_session_protocol_field_type; + u16 del_dslite_session_protocol_field_size; +#endif + + /* + * Netflow option template + * Ingress VRF ID - Name mapping + * This template will be sent under flowset id 1 + */ + cnat_nfv9_option_template_t cnat_nfv9_option_template; +} cnat_nfv9_template_t; + +/* + * The Dataflow header for each add/delete record group + */ +typedef struct { + u16 dataflow_template_id; + u16 dataflow_length; +} nfv9_dataflow_record_header_t; + +/* + * NFv9 Add record definition + */ + +/* + * pad bytes needed to make the structure a multiple of 4 bytes + */ +#define CNAT_NFV9_ADD_RECORD_PAD_BYTES (3) +#define CNAT_NFV9_DEL_RECORD_PAD_BYTES (1) + +#define CNAT_NFV9_NAT64_ADD_BIB_RECORD_PAD_BYTES (3) +#define CNAT_NFV9_NAT64_DEL_BIB_RECORD_PAD_BYTES (1) +#define CNAT_NFV9_NAT64_ADD_SESSION_RECORD_PAD_BYTES (1) +#define CNAT_NFV9_NAT64_DEL_SESSION_RECORD_PAD_BYTES (3) +#define CNAT_NFV9_NAT44_ADD_SESSION_RECORD_PAD_BYTES (1) +#define CNAT_NFV9_NAT44_DEL_SESSION_RECORD_PAD_BYTES (3) + +#define CNAT_NFV9_DS_LITE_ADD_RECORD_PAD_BYTES (3) +#define CNAT_NFV9_DS_LITE_DEL_RECORD_PAD_BYTES (1) +#define CNAT_NFV9_DS_LITE_ADD_SESSION_RECORD_PAD_BYTES (1) +#define CNAT_NFV9_DS_LITE_DEL_SESSION_RECORD_PAD_BYTES (3) + +#define CNAT_NFV9_INGRESS_VRFID_NAME_RECORD_PAD_BYTES (0) + +typedef struct { + u32 inside_vrf_id; + u32 outside_vrf_id; + u32 inside_ip_addr; + u32 outside_ip_addr; + u16 inside_ip_port; + u16 outside_ip_port; + u8 protocol; + u8 pad[CNAT_NFV9_ADD_RECORD_PAD_BYTES]; +} nfv9_add_record_t; + +/* + * NFv9 Delete record definition + */ +typedef struct { + u32 inside_vrf_id; + u32 inside_ip_addr; + u16 inside_ip_port; + u8 protocol; + u8 pad[CNAT_NFV9_DEL_RECORD_PAD_BYTES]; +} nfv9_del_record_t; + +#ifndef NO_BULK_LOGGING + +#define CNAT_NFV9_BULK_ADD_RECORD_PAD_BYTES (0) +#define CNAT_NFV9_BULK_DEL_RECORD_PAD_BYTES (2) + +typedef struct { + u32 inside_vrf_id; + u32 outside_vrf_id; + u32 inside_ip_addr; + u32 outside_ip_addr; + u16 outside_ip_port_start; + u16 outside_ip_port_end; + u8 pad[CNAT_NFV9_BULK_ADD_RECORD_PAD_BYTES]; +} nfv9_bulk_add_record_t; + +/* + * NFv9 Delete record definition + */ +typedef struct { + u32 inside_vrf_id; + u32 inside_ip_addr; + u16 outside_ip_port_start; + u8 pad[CNAT_NFV9_BULK_DEL_RECORD_PAD_BYTES]; +} nfv9_bulk_del_record_t; + +/* + * DS-lite bulk port (user based) add record definition + */ + +#define CNAT_NFV9_DS_LITE_BULK_ADD_RECORD_PAD_BYTES (0) +#define CNAT_NFV9_DS_LITE_BULK_DEL_RECORD_PAD_BYTES (2) + +typedef struct { + u32 inside_vrf_id; + u32 outside_vrf_id; + u32 inside_ip_addr; + u32 inside_v6_src_addr[4]; + u32 outside_ip_addr; + u16 outside_ip_port_start; + u16 outside_ip_port_end; + u8 pad[CNAT_NFV9_DS_LITE_BULK_ADD_RECORD_PAD_BYTES]; +} nfv9_ds_lite_bulk_add_record_t; + + +/* + * DS-lite bulk port (user based) delete record definition + */ + +typedef struct { + u32 inside_vrf_id; + u32 inside_ip_addr; + u32 inside_v6_src_addr[4]; + u16 outside_ip_port_start; + u8 pad[CNAT_NFV9_DS_LITE_BULK_DEL_RECORD_PAD_BYTES]; +} nfv9_ds_lite_bulk_del_record_t; + +#endif /* NO_BULK_LOGGING */ + +/* NAT64 related structures */ + +typedef struct { + u32 inside_v6_src_addr[4]; + u32 outside_v4_src_addr; + u16 inside_src_port; + u16 outside_src_port; + u8 protocol; + u8 pad[CNAT_NFV9_NAT64_ADD_BIB_RECORD_PAD_BYTES]; +} nfv9_nat64_add_bib_record_t; + + +typedef struct { + u32 inside_v6_src_addr[4]; + u32 outside_v4_src_addr; + u32 inside_v6_dest_addr[4]; + u32 outside_v4_dest_addr; + u16 inside_src_port; + u16 outside_src_port; + u16 dest_port; + u8 protocol; + u8 pad[CNAT_NFV9_NAT64_ADD_SESSION_RECORD_PAD_BYTES]; +} nfv9_nat64_add_session_record_t; + + +typedef struct { + u32 inside_v6_src_addr[4]; + u16 inside_src_port; + u8 protocol; + u8 pad[CNAT_NFV9_NAT64_DEL_BIB_RECORD_PAD_BYTES]; +} nfv9_nat64_del_bib_record_t; + + +typedef struct { + u32 inside_v6_src_addr[4]; + u32 inside_v6_dest_addr[4]; + u16 inside_src_port; + u16 dest_port; + u8 protocol; + u8 pad[CNAT_NFV9_NAT64_DEL_SESSION_RECORD_PAD_BYTES]; +} nfv9_nat64_del_session_record_t; + +/* + * NFv9 Session based Add record definition + */ +typedef struct { + u32 inside_vrf_id; + u32 outside_vrf_id; + u32 inside_ip_addr; + u32 outside_ip_addr; + u16 inside_ip_port; + u16 outside_ip_port; + u32 dest_ip_addr; + u16 dest_port; + u8 protocol; + u8 pad[CNAT_NFV9_NAT44_ADD_SESSION_RECORD_PAD_BYTES]; +} nfv9_add_session_record_t; + +/* + * NFv9 Session based del record definition + */ +typedef struct { + u32 inside_vrf_id; + u32 inside_ip_addr; + u32 dest_ip_addr; + u16 inside_ip_port; + u16 dest_port; + u8 protocol; + u8 pad[CNAT_NFV9_NAT44_DEL_SESSION_RECORD_PAD_BYTES]; +} nfv9_del_session_record_t; + +/* + * DS-lite NFv9 create record structure + */ +typedef struct { + u32 inside_vrf_id; + u32 outside_vrf_id; + u32 inside_ip_addr; + u32 inside_v6_src_addr[4]; + u32 outside_ip_addr; + u16 inside_ip_port; + u16 outside_ip_port; + u8 protocol; + u8 pad[CNAT_NFV9_DS_LITE_ADD_RECORD_PAD_BYTES]; +} nfv9_ds_lite_add_record_t; + +typedef struct { + u32 inside_vrf_id; + u32 inside_ip_addr; + u32 inside_v6_src_addr[4]; + u16 inside_ip_port; + u8 protocol; + u8 pad[CNAT_NFV9_DS_LITE_DEL_RECORD_PAD_BYTES]; +} nfv9_ds_lite_del_record_t; + +/* + * NFv9 Session based Add record definition + */ +typedef struct { + u32 inside_vrf_id; + u32 outside_vrf_id; + u32 inside_ip_addr; + u32 inside_v6_src_addr[4]; + u32 outside_ip_addr; + u16 inside_ip_port; + u16 outside_ip_port; + u32 dest_ip_addr; + u16 dest_port; + u8 protocol; + u8 pad[CNAT_NFV9_DS_LITE_ADD_SESSION_RECORD_PAD_BYTES]; +} nfv9_ds_lite_add_session_record_t; + +/* + * NFv9 Session based del record definition + */ +typedef struct { + u32 inside_vrf_id; + u32 inside_ip_addr; + u32 inside_v6_src_addr[4]; + u32 dest_ip_addr; + u16 inside_ip_port; + u16 dest_port; + u8 protocol; + u8 pad[CNAT_NFV9_DS_LITE_DEL_SESSION_RECORD_PAD_BYTES]; +} nfv9_ds_lite_del_session_record_t; + + +typedef struct { + u32 ingress_vrf_id; + u8 ingress_vrf_name[NFV9_VRF_NAME_LEN]; + u8 pad[CNAT_NFV9_INGRESS_VRFID_NAME_RECORD_PAD_BYTES]; +} nfv9_ingress_vrfid_name_record_t; + +#define CNAT_NFV9_TEMPLATE_OFFSET \ + (CNAT_NFV9_HDR_OFFSET + sizeof(nfv9_header_t)) + +#define CNAT_NFV9_TEMPLATE_LENGTH (sizeof(cnat_nfv9_template_t)) +#define CNAT_NFV9_OPTION_TEMPLATE_LENGTH (sizeof(cnat_nfv9_option_template_t)) + +#define CNAT_NFV9_DATAFLOW_RECORD_HEADER_LENGTH \ + (sizeof(nfv9_dataflow_record_header_t)) + +/* + * No padding is needed for the add/delete records - reduce padding bytes + */ + +#define CNAT_NFV9_ADD_RECORD_LENGTH (sizeof(nfv9_add_record_t) - \ + CNAT_NFV9_ADD_RECORD_PAD_BYTES) + +#define CNAT_NFV9_DEL_RECORD_LENGTH (sizeof(nfv9_del_record_t) - \ + CNAT_NFV9_DEL_RECORD_PAD_BYTES) + +#define CNAT_NFV9_DS_LITE_ADD_RECORD_LENGTH (sizeof(nfv9_ds_lite_add_record_t) - \ + CNAT_NFV9_DS_LITE_ADD_RECORD_PAD_BYTES) +#define CNAT_NFV9_DS_LITE_DEL_RECORD_LENGTH (sizeof(nfv9_ds_lite_del_record_t) - \ + CNAT_NFV9_DS_LITE_DEL_RECORD_PAD_BYTES) +#ifndef NO_BULK_LOGGING +#define CNAT_NFV9_BULK_ADD_RECORD_LENGTH (sizeof(nfv9_bulk_add_record_t) - \ + CNAT_NFV9_BULK_ADD_RECORD_PAD_BYTES) +#define CNAT_NFV9_BULK_DEL_RECORD_LENGTH (sizeof(nfv9_bulk_del_record_t) - \ + CNAT_NFV9_BULK_DEL_RECORD_PAD_BYTES) + +#define CNAT_NFV9_DS_LITE_BULK_ADD_RECORD_LENGTH (sizeof(nfv9_ds_lite_bulk_add_record_t) - \ + CNAT_NFV9_DS_LITE_BULK_ADD_RECORD_PAD_BYTES) +#define CNAT_NFV9_DS_LITE_BULK_DEL_RECORD_LENGTH (sizeof(nfv9_ds_lite_bulk_del_record_t) - \ + CNAT_NFV9_DS_LITE_BULK_DEL_RECORD_PAD_BYTES) + + +#endif /* NO_BULK_LOGGING */ + +#define CNAT_NFV9_INGRESS_VRFID_NAME_RECORD_LENGTH (sizeof(nfv9_ingress_vrfid_name_record_t) - \ + CNAT_NFV9_INGRESS_VRFID_NAME_RECORD_PAD_BYTES) + +#define CNAT_NFV9_NAT64_ADD_BIB_RECORD_LENGTH \ + (sizeof(nfv9_nat64_add_bib_record_t) - \ + CNAT_NFV9_NAT64_ADD_BIB_RECORD_PAD_BYTES) + +#define CNAT_NFV9_NAT64_DEL_BIB_RECORD_LENGTH \ + (sizeof(nfv9_nat64_del_bib_record_t) - \ + CNAT_NFV9_NAT64_DEL_BIB_RECORD_PAD_BYTES) + +#define CNAT_NFV9_NAT64_ADD_SESSION_RECORD_LENGTH \ + (sizeof(nfv9_nat64_add_session_record_t) - \ + CNAT_NFV9_NAT64_ADD_SESSION_RECORD_PAD_BYTES) + +#define CNAT_NFV9_NAT64_DEL_SESSION_RECORD_LENGTH \ + (sizeof(nfv9_nat64_del_session_record_t) - \ + CNAT_NFV9_NAT64_DEL_SESSION_RECORD_PAD_BYTES) + +#define CNAT_NFV9_MAX_SINGLE_RECORD_LENGTH \ + (sizeof(nfv9_ds_lite_add_session_record_t) - \ + CNAT_NFV9_DS_LITE_ADD_SESSION_RECORD_PAD_BYTES) + +#define CNAT_NFV9_NAT44_ADD_SESSION_RECORD_LENGTH \ + (sizeof(nfv9_add_session_record_t) -\ + CNAT_NFV9_NAT44_ADD_SESSION_RECORD_PAD_BYTES) + +#define CNAT_NFV9_NAT44_DEL_SESSION_RECORD_LENGTH \ + (sizeof(nfv9_del_session_record_t) -\ + CNAT_NFV9_NAT44_DEL_SESSION_RECORD_PAD_BYTES) + +#define CNAT_NFV9_DS_LITE_ADD_SESSION_RECORD_LENGTH \ + (sizeof(nfv9_ds_lite_add_session_record_t) -\ + CNAT_NFV9_DS_LITE_ADD_SESSION_RECORD_PAD_BYTES) + +#define CNAT_NFV9_DS_LITE_DEL_SESSION_RECORD_LENGTH \ + (sizeof(nfv9_ds_lite_del_session_record_t) -\ + CNAT_NFV9_DS_LITE_DEL_SESSION_RECORD_PAD_BYTES) + +/* + * Minimum value of the path MTU value + */ +#define CNAT_NFV9_MIN_RECORD_SIZE (60 + \ + CNAT_NFV9_DATAFLOW_RECORD_HEADER_LENGTH + \ + CNAT_NFV9_TEMPLATE_LENGTH + \ + CNAT_NFV9_MAX_SINGLE_RECORD_LENGTH) + +/* + * Let us put the maximum length of the netflow data to be 1400 + */ +#define CNAT_NFV9_MAX_PKT_LENGTH 1400 + +/* + * Data structures and defines to store NFV9 specific info + */ +#define CNAT_NFV9_INVALID_LOGGING_INDEX 0xffffffff + +/* + * Padding value between ADD and DELETE records. This can be atmost 3 bytes + */ +#define NFV9_PAD_VALUE (3) + +typedef struct { + /* NFV9 server specific info + * For now, it will maintain only package sequence count. + * Later it will maintain server address, port, etc. + * Though it currently has server address and port, it is only for + * cross refernce + */ + u32 ipv4_address; /* Destination IP address of the collector */ + u16 port; /* Destination port number of the collector */ + u16 refresh_rate; /* Refresh rate in packets after which template is sent */ + u16 timeout_rate; /* Timeout rate in seconds after which template is sent */ + u16 ref_count; /* Num of instances using this data */ + u32 sequence_num; /* Sequence number of the logging packet */ + /* + * Keep track of the time and packets since last template send + */ + u32 last_template_sent_time; + u32 pkts_since_last_template; + u8 template_sent; /* used while sending vrfid-name mapping */ + +} nfv9_server_info_t; + +/* + * This structure store the Netflow Logging information on per NFv9 + * collector basis. This structure is allocated from a pool and index + * to this structure is stored VRF MAP structures + */ +typedef struct { + /* + * nat64_id will be 0 for nat44 config and i_vrf_id, i_vrf will be 0 + * for nat64 config. Nat64_id will be used while nat64 collector is + * search and i_vrf* for nat44 collector + */ + /* Similarly for ds_lite, ds_lite_id will be used and nat64_id, + * ivrf_id shall be set to 0 + */ + u32 i_vrf_id; /* Inside VRF ID corresponding to this collector */ + u16 i_vrf; /* Inside VRF (uidb_index) corresponding to this collector */ + u16 nat64_id; /* NAT64 instance for to this collector */ + u16 ds_lite_id; /* DS Lite instance for this collector */ + + /* + * This field determines the maximum size of the Netflow V9 information + * that can be stored in a logging packet + */ + u16 max_length_minus_max_record_size; + + /* + * Indicates if the entry is already deleted + */ + u16 deleted; + + u16 pkt_length; /* Length of the currently NFv9 information */ + u16 record_length[MAX_RECORDS]; /* Length of delete record */ + u16 total_record_count; /* Total number of records including templates */ + + u8 logging_policy; + + /* + * Keep track of the time and packets since last template send + */ + u32 last_template_sent_time; + u32 pkts_since_last_template; + + /* Server info */ + u32 server_index; + + /* + * current logging context + */ + vlib_buffer_t *current_logging_context; + + /* + * Timestamp in UNIX seconds corresponding to when the current + * logging packet was created + */ + u32 current_logging_context_timestamp; + + /* + * Queued logging context waiting to be sent to the l3 infra node + */ + vlib_buffer_t *queued_logging_context; + + /* + * Headers corresponding to various records in this + * current nfv9 logging context + */ + nfv9_header_t *nfv9_header; + cnat_nfv9_template_t *nfv9_template_header; + nfv9_dataflow_record_header_t *dataflow_header; + u8 *record[MAX_RECORDS]; + u8 *next_data_ptr; + u8 last_record; + u32 nfv9_logging_next_index; + u32 ip4_input_node_index; + vlib_frame_t *f; + u32 *to_next; +} cnat_nfv9_logging_info_t; + + +/* + * Global structure for CGN APP configuration + */ +typedef struct { + /* + * Global NFv9 Logging Collector Index + */ + u32 cnat_nfv9_global_collector_index; + + /* + * Node index corresponding to the infra L3 output node + * to which the nfv9 logging node will send the packet + */ + u16 cnat_nfv9_disp_node_index; + + /* + * Whether we have initialized the NFv9 information + */ + u8 cnat_nfv9_init_done; +} cnat_nfv9_global_info_t; + +typedef enum { + cnat_nfv9_template_add_default, + cnat_nfv9_template_add_always +} cnat_nfv9_template_add_flag_t; + +extern cnat_nfv9_template_t cnat_nfv9_template_info; + +extern cnat_nfv9_logging_info_t cnat_default_nfv9_logging_info; +extern cnat_nfv9_logging_info_t *cnat_nfv9_logging_info_pool; + +extern cnat_nfv9_global_info_t cnat_nfv9_global_info; +extern nfv9_server_info_t *nfv9_server_info_pool; + +/* #define DEBUG_NF_SERVER_CONFIG 1 */ +static inline void nfv9_delete_server_info(cnat_nfv9_logging_info_t *nfv9_info) +{ + nfv9_server_info_t *server = nfv9_server_info_pool + + nfv9_info->server_index; + if(nfv9_info->server_index == EMPTY) { +#ifdef DEBUG_NF_SERVER_CONFIG + if(my_instance_number == 1) { + PLATFORM_DEBUG_PRINT("Deleting empty server info\n"); + } +#endif /* #ifdef DEBUG_NF_SERVER_CONFIG */ + return; + } + + /* Check if this server is not used by anyone.. if not delete */ + /* Caller of this function does not need it..so decrement ref count */ + server->ref_count--; + if(!(server->ref_count)) { +#ifdef DEBUG_NF_SERVER_CONFIG + if(my_instance_number == 1) { + PLATFORM_DEBUG_PRINT("Deleting nfv9 server %x, %d at %d\n", + server->ipv4_address, + server->port, + nfv9_info->server_index); + } +#endif /* #ifdef DEBUG_NF_SERVER_CONFIG */ + pool_put(nfv9_server_info_pool, server); + nfv9_info->server_index = EMPTY; + } +#ifdef DEBUG_NF_SERVER_CONFIG + else { + if(my_instance_number == 1) { + PLATFORM_DEBUG_PRINT("Not Deleting nfv9 server %x, %d rc %d\n", + server->ipv4_address, + server->port, + server->ref_count); + } + } +#endif /* #ifdef DEBUG_NF_SERVER_CONFIG */ + return; +} + +void handle_pending_nfv9_pkts(); +#endif /* __CNAT_LOGGING_H__ */ diff --git a/vnet/vnet/vcgn/cnat_pcp_server.h b/vnet/vnet/vcgn/cnat_pcp_server.h new file mode 100644 index 00000000000..c77c6a875f8 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_pcp_server.h @@ -0,0 +1,398 @@ +/* + *------------------------------------------------------------------ + * cnat_pcp_server.h + * + * Copyright (c) 2009-2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_PCP_SERVER_H__ +#define __CNAT_PCP_SERVER_H__ + +#include "dslite_defs.h" + +/* Debug utils of PCP */ +#define PCP_DBG(debug, ...) \ + if(PREDICT_FALSE(cnat_pcp_debug_flag >= debug)) { \ + printf("%s:%s:%d - ", \ + __FILE__, __FUNCTION__, __LINE__);\ + printf(__VA_ARGS__);\ + printf("\n"); \ + } + +#define PCP_DUMP_PDATA \ + if(PREDICT_FALSE(cnat_pcp_debug_flag >= 100)) { \ + printf("%s:%s:%d - \n", \ + __FILE__, __FUNCTION__, __LINE__);\ + printf("src - ip = %X, proto = %d, port = %d i_vrf = %d, o_vrf = %d\n", \ + pcp_data.src_ip[3], pcp_data.proto, pcp_data.src_port, pcp_data.i_vrf, pcp_data.o_vrf); \ + printf(" third party ip = %X\n", pcp_data.third_party_ip[3]); \ + printf("map - ip = %X, port = %d \n", \ + pcp_data.ext_ip[3], pcp_data.ext_port);\ + printf("remote - ip = %X, port = %d \n", \ + pcp_data.peer_ip[3], pcp_data.peer_port); \ + printf("req life time = %d \n", pcp_data.req_lifetime); \ + printf("drop = %d \n", pcp_data.drop);\ + printf("udp_len = %d \n", pcp_data.udp_len); \ + printf("pm = %p \n", pcp_data.pm); \ + printf("cnat_proto = %X \n", pcp_data.cnat_proto); \ + printf("inst_id = %X \n", pcp_data.inst_id); \ + printf("======================================================\n"); \ + } + +#define PCP_DUMP_PACKET(ip, len) pcp_hex_dump(ip, len) + +#ifdef TOBE_PORTED +#define PCP_INCR(counter) pcp_counters.pcp_##counter++ ; +#else +#define PCP_INCR(counter) +#endif + +typedef struct pcp_debug_counters { + u64 pcp_input; + u64 pcp_output; + u64 pcp_service_nat44; + u64 pcp_service_dslite; + /* below all are drops */ + u64 pcp_drops; + u64 pcp_i2o_key_inuse; + u64 pcp_throttle_drops; + u64 pcp_udp_len; + u64 pcp_nrequest; + u64 pcp_min_udp_len; + u64 pcp_max_udp_len; + u64 pcp_mod4_len; + u64 pcp_invalid_3rd_len; + u64 pcp_invalid_option; + u64 pcp_version; + u64 pcp_invalid_opcode; + u64 pcp_invalid_client_ip; + u64 pcp_invalid_proto; + u64 pcp_invalid_port; + u64 pcp_invalid_vrfmap; + u64 pcp_invalid_ext_addr; + u64 pcp_out_addr_inuse; + u64 pcp_exact_match; + u64 pcp_exact_entry_created; + u64 pcp_exact_db_alloc_failed; + u64 pcp_udb_mismatch; + u64 pcp_noexact_db_allocated; + u64 pcp_static_entry_present; + u64 pcp_entry_deleted; + u64 pcp_3rd_party_option; + + /* map counters */ + u64 pcp_map_input; + u64 pcp_map_min_len; + u64 pcp_map_max_len; + u64 pcp_map_invalid_option; + u64 pcp_map_invalid_option_len; + u64 pcp_map_pref_fail_option; + u64 pcp_map_invalid_delete_req; + u64 pcp_map_delete_req; + u64 pcp_map_create_req; + u64 pcp_map_refresh; + + /* peer counters */ + u64 pcp_peer_input; + u64 pcp_peer_invalid_len; + u64 pcp_peer_delete_req; + u64 pcp_peer_create_req; + u64 pcp_peer_addr_mistmatch; + u64 pcp_peer_refresh; + +} pcp_debug_counters_t; + +typedef struct { + u16 msg_id; + u8 rc; + u8 pad[5]; + + /* better to have a group structures rather than individual + variables, any change in counters is will automatically + reflect here */ + pcp_debug_counters_t counters; +} pcp_show_counters_resp_t ; + + + +/* PCP opcodes */ +typedef enum pcp_opcode { + PCP_OPCODE_MAP = 1, + PCP_OPCODE_PEER = 2 +}pcp_opcode_t; + + +/* PCP opcodes */ +typedef enum pcp_options { + PCP_OPTION_3RD_PARTY = 1, + PCP_OPTION_PREF_FAIL = 2, + PCP_OPTION_FILTER = 3 +} pcp_options_t; + +/* PCP Result codes */ +typedef enum pcp_result_codes { + PCP_SUCCESS = 0, + PCP_ERR_UNSUPP_VERSION = 1, + PCP_ERR_NOT_AUTHORIZED = 2, + PCP_ERR_MALFORMED_REQUEST = 3, + PCP_ERR_UNSUPP_OPCODE = 4, + PCP_ERR_UNSUPP_OPTION = 5, + PCP_ERR_MALFORMED_OPTION = 6, + PCP_ERR_NETWORK_FAILURE = 7, + PCP_ERR_NO_RESOURCES = 8, + PCP_ERR_UNSUPP_PROTOCOL = 9, + PCP_ERR_USER_EX_QUOTA = 10, + PCP_ERR_CANNOT_PROVIDE_EXTERNAL = 11, + PCP_ERR_ADDRESS_MISMATCH = 12, + PCP_ERR_EXCESSIVE_REMOTE_PEERS = 13 +} pcp_result_codes_t; + +#define PCP_DISABLED 0 +#define PCP_ENABLED 1 + +#define PCP_DROP 1 + +#define PCP_STATIC_LIFETIME 0xFFFFFFFF +#define PCP_MAX_LIFETIME 0x00015180 /* 24 hours = 86400 seconds*/ + +#define PCP_VERSION_SUPPORTED 1 + +#define PCP_NO_PREF_FAIL_OPTION 0 +#define PCP_PREF_FAIL_OPTION 1 + +#define CNAT_DEF_PCP_PORT 5351 + +#define PCP_REQ_RESP_BIT 0x80 +#define PCP_RESPONSE(r_opcode) (r_opcode & PCP_REQ_RESP_BIT) +#define PCP_REQUEST(r_opcode) !(PCP_RESPONSE(r_opcode)) + +#define PCP_REQ_OPCODE(r_opcode) (r_opcode & 0x7F) + +/* 24 bytes */ +#define PCP_COMMON_HDR_LEN sizeof(pcp_request_t) + +/* 8 bytes */ +#define UDP_HDR_LEN sizeof(udp_hdr_type_t) + +#define PCP_PREF_FAIL_OPTION_SIZE \ + sizeof(pcp_prefer_fail_option_t) + +#define PCP_3RD_PARTY_OPTION_SIZE \ + sizeof(pcp_3rd_party_option_t) + +#define PCP_MIN_LEN PCP_COMMON_HDR_LEN + +/* 24+8=32 bytes */ +#define PCP_MIN_UDP_LEN (PCP_MIN_LEN + UDP_HDR_LEN) + +#define PCP_MAX_LEN 1024 + +/* 1024+8 = 1032 bytes */ +#define PCP_MAX_UDP_LEN (PCP_MAX_LEN + UDP_HDR_LEN) + +/* 24+ 24 = 48 bytes */ +#define PCP_MAP_OPCODE_MIN_LEN (PCP_COMMON_HDR_LEN + \ + sizeof( pcp_map_option_specific_data_t)) + +/* 24 + 44 = 68 bytes */ +#define PCP_PEER_OPCODE_MIN_LEN (PCP_COMMON_HDR_LEN + \ + sizeof( pcp_peer_option_specific_data_t)) + +/* 48 + 8 = 56 bytes */ +#define PCP_MAP_OPCODE_MIN_UDP_LEN (PCP_MAP_OPCODE_MIN_LEN + \ + UDP_HDR_LEN ) + +#define PCP_GET_MAP_OPTION_OFFSET(req) \ + ((u8*)req + PCP_MAP_OPCODE_MIN_LEN) + +#define PCP_GET_PEER_OPTION_OFFSET(req) \ + ((u8*)req + PCP_PEER_OPCODE_MIN_LEN) + + +#define PCP_REQ_TOTAL_LEN(udp) (udp->udp_length - \ + UDP_HDR_LEN) +/* 56 + 4 = 60 bytes */ +#define PCP_MAP_OPCODE_PREF_FAIL_OPTION_LEN \ + (PCP_MAP_OPCODE_MIN_UDP_LEN + \ + sizeof(pcp_prefer_fail_option_t)) + + +/* 68 + 8 = 76 bytes */ +#define PCP_PEER_OPCODE_MIN_UDP_LEN (PCP_PEER_OPCODE_MIN_LEN + \ + UDP_HDR_LEN) + +#define PCP_MUST_OPTION(option_code) (option_code & 0x80) + + + +/* 56 + 20 = 76*/ +#define PCP_DSLITE_MAP_OPCODE_MIN_UDP_LEN \ + ( PCP_MAP_OPCODE_MIN_UDP_LEN + \ + PCP_3RD_PARTY_OPTION_SIZE) + +/* 60 + 20 = 80 */ +#define PCP_DSLITE_MAP_OPCODE_MAX_UDP_LEN \ + ( PCP_MAP_OPCODE_PREF_FAIL_OPTION_LEN + \ + PCP_3RD_PARTY_OPTION_SIZE) + +/* 76 + 20 = 96 */ +#define PCP_DSLITE_PEER_OPCODE_MIN_UDP_LEN \ + ( PCP_PEER_OPCODE_MIN_UDP_LEN + \ + PCP_3RD_PARTY_OPTION_SIZE) + + +#define PCP_SET_CNAT_PROTO(proto) \ + pcp_data.cnat_proto = (proto == TCP_PROT) ? CNAT_TCP: \ + (proto == UDP_PROT)? CNAT_UDP : CNAT_ICMP; + +#define PCP_SET_REQ_LIFETIME() \ + if(pcp_data.db->flags & CNAT_DB_FLAG_STATIC_PORT) { \ + pcp_data.db->proto_data.seq_pcp.pcp_lifetime = \ + PCP_STATIC_LIFETIME; \ + pcp_data.req_lifetime = PCP_STATIC_LIFETIME; \ + } else { \ + pcp_data.db->proto_data.seq_pcp.pcp_lifetime = \ + pcp_data.req_lifetime + cnat_current_time ; \ + } + + +/* per second not more than PCP_THROTTLE_LIMIT + * delete requests will be handled. + * this excludes , specific entries, in which + * only one entry needs to be deleted + */ +#define PCP_THROTTLE_LIMIT 2 + +typedef struct pcp_request { + u8 ver; + u8 r_opcode; + u16 reserved; + u32 req_lifetime; + u32 ip[4]; /* ipv4 will be represented + by the ipv4 mapped ipv6 */ +} pcp_request_t; + +typedef struct pcp_response { + u8 ver; + u8 r_opcode; + u8 reserved; + u8 result_code; + u32 lifetime; + u32 epochtime; + u32 reserved1[3]; +} pcp_response_t; + + +typedef struct pcp_options_hdr { + u8 code; + u8 reserved; + u16 len; + u8 data[0]; +} pcp_options_hdr_t; + + +/* same for both request and response */ +typedef struct pcp_map_option_specific_data { + u8 protocol; + u8 reserved[3]; + u16 int_port; + u16 ext_port; + u32 ext_ip[4]; /* ipv4 will be represnted + by the ipv4 mapped ipv6 */ +} pcp_map_option_specific_data_t; + +/* same for both request and response */ +typedef struct pcp_peer_option_specific_data { + u8 protocol; + u8 reserved[3]; + u16 int_port; + u16 ext_port; + u32 ext_ip[4]; /* ipv4 will be represented + by the ipv4 mapped ipv6 */ + u16 peer_port; + u16 reserved1; + u32 peer_ip[4]; +} pcp_peer_option_specific_data_t; + +typedef struct pcp_prefer_fail_option { + u8 option; + u8 reserved; + u16 len; +} pcp_prefer_fail_option_t; + + +typedef struct pcp_3rd_party_option{ + u8 option; + u8 reserved; + u16 len; + u32 ip[4]; +} pcp_3rd_party_option_t; + +/* structure used as pipeline data */ + +typedef struct pcp_pipeline_data { + + union { + + u8 *p; + ipv4_header *ip ; + ipv6_header_t *ipv6 ; + + } l3addr; + + udp_hdr_type_t *udp; + pcp_request_t *req; + pcp_response_t *resp; + pcp_opcode_t opcode; + u32 src_ip[4]; + u16 src_port; + u8 proto; + u16 i_vrf; + u16 o_vrf; + u32 ext_ip[4]; + u16 ext_port; + u32 third_party_ip[4]; + + /* valid for peer opcode */ + u32 peer_ip[4]; + u32 peer_port; + u32 req_lifetime; + u32 udp_len; + pcp_options_t pref_fail; + pcp_options_t third_party; + u8 *option_spec; + pcp_result_codes_t ret_code; + cnat_portmap_v2_t *pm; + cnat_main_db_entry_t *db; + cnat_vrfmap_t *vrfmap; + dslite_table_entry_t *inst_ptr; + u16 inst_id; + u32 flags; + u16 cnat_proto; + + /* is packet needs to be dropped ? */ + u8 drop; + /* nat44, dslite, nat64 */ +#define PCP_SERVICE_NAT44 1 +#define PCP_SERVICE_DSLITE 2 +#define PCP_SERVICE_NAT64 3 + u8 service_type; + +#define PCP_REQ_ENTRY_PRESENT 1 +#define PCP_REQ_EXT_MAP_PRESENT 1 + u8 state; +} pcp_pipeline_data_t; + +#endif /* __CNAT_PCP_sERVER_H__ */ diff --git a/vnet/vnet/vcgn/cnat_ports.c b/vnet/vnet/vcgn/cnat_ports.c new file mode 100644 index 00000000000..4437865aaee --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ports.c @@ -0,0 +1,1113 @@ +/* + *------------------------------------------------------------------ + * cnat_ports.c - port allocator + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> +#include <vppinfra/clib.h> +#include <vppinfra/bitmap.h> + +#include "cnat_db.h" +#include "cnat_config.h" +#include "cnat_global.h" +#include "cnat_logging.h" +#include "spp_timers.h" +#include "platform_common.h" +#include "cgn_bitmap.h" +#include "spp_platform_trace_log.h" +#include "cnat_ports.h" + +#if 1 /* TOBE_PORTED */ +/* Following is defined elsewhere. */ +#define msg_spp_err(s) \ +do { \ + fprintf(stderr,(i8 *)s); \ + fputs("\n", stderr); \ +} while(0); +#endif + + +#define PM_90_PERCENT_USE 58980 +/* + * instance number provisioned from HW + */ +u8 my_instance_number = 0; + +typedef struct { + u32 cached_next_index; + /* $$$$ add data here */ + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} cnat_ports_main_t; + +cnat_ports_main_t cnat_ports_main; + +static u32 rseed_port; /* random number generator seed */ + +void +cnat_db_dump_portmap_for_vrf (u32 vrfmap_index) +{ + u32 i, pm_len; + cnat_vrfmap_t *my_vrfmap = cnat_map_by_vrf + vrfmap_index; + cnat_portmap_v2_t *pm, *my_pm __attribute__((unused)); + + pm = my_vrfmap->portmap_list; + pm_len = vec_len(pm); + + for (i = 0; i < pm_len; i++) { + my_pm = pm + i; + + PLATFORM_DEBUG_PRINT("pm %d: IPv4 Addr 0x%x - in use %d private_ip_users_count %d\n", + i, my_pm->ipv4_address, my_pm->inuse, + my_pm->private_ip_users_count); + + PLATFORM_DEBUG_PRINT("pm %d: IPv4 Addr 0x%x - in use %d " + "private_ip_users_count %d\n", + i, my_pm->ipv4_address, my_pm->inuse, + my_pm->private_ip_users_count); + } +} + +void +cnat_db_dump_portmaps () +{ + u32 i, vrfmap_index; + + for (i = 0; i < CNAT_MAX_VRFMAP_ENTRIES; i++) { + vrfmap_index = vrf_map_array[i]; + + if (vrfmap_index == VRF_MAP_ENTRY_EMPTY) { + continue; + } + + PLATFORM_DEBUG_PRINT("\n\nDumping the port map for uidb_index %d\n", i); + cnat_db_dump_portmap_for_vrf(vrfmap_index); + } +} + +#ifndef NO_BULK_LOGGING +static int check_if_stat_alloc_ok_for_bulk(cnat_portmap_v2_t *pm, + u16 i_port, bulk_alloc_size_t bulk_size, + u16 static_port_range) +{ + uword bit_test_result; + if(BULK_ALLOC_SIZE_NONE == bulk_size) return 1; /* No issues */ + + if(i_port < static_port_range) return 1; /* we don't want bulk */ + + i_port = (i_port/bulk_size) * bulk_size; + bit_test_result = cgn_clib_bitmap_check_if_all(pm->bm, i_port, bulk_size); + return(bit_test_result); +} +#else /* dummy */ +inline static int check_if_stat_alloc_ok_for_bulk(cnat_portmap_v2_t *pm, + u16 i_port, bulk_alloc_size_t bulk_size, + u16 static_port_range) +{ + return 1; +} +#endif /* NO_BULK_LOGGING */ +/* + * cnat_port_alloc_static_v2 + * public ipv4 address/port allocator for Static Port commands + * tries to allocate same outside port as inside port + */ +cnat_errno_t +cnat_static_port_alloc_v2 ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u32 i_ipv4_address, + u16 i_port, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range +#ifndef NO_BULK_LOGGING + , bulk_alloc_size_t bulk_size, + int *nfv9_log_req +#endif + , u16 ip_n_to_1 + ) +{ + u32 i, hash_value, my_index, found, max_attempts; + u16 start_bit, new_port; + cnat_portmap_v2_t *my_pm = 0; + u32 pm_len = vec_len(pm); + uword bit_test_result; + +#ifndef NO_BULK_LOGGING + *nfv9_log_req = BULK_ALLOC_NOT_ATTEMPTED; +#endif + + if (PREDICT_FALSE(pm_len == 0)) { + return (CNAT_NO_POOL_ANY); + } + + switch (atype) { + + case PORT_ALLOC_ANY: + + found = 0; + + /* + * Try to hash the IPv4 address to get an index value to select the pm + */ + hash_value = (i_ipv4_address & 0xffff) ^ + ((i_ipv4_address > 16) & 0xffff); + + /* + * If pm_len <= 256, compact the hash to 8 bits + */ + if (PREDICT_TRUE(pm_len <= 256)) { + hash_value = (hash_value & 0xff) ^ ((hash_value > 8) & 0xff); + } + + /* + * Ensure that the hash value is in the range 0 .. (pm_len-1) + */ + my_index = hash_value % pm_len; + + for (i = 0; i < PORT_PROBE_LIMIT; i++) { + my_pm = pm + my_index; + if(PREDICT_TRUE(ip_n_to_1)) { + if(PREDICT_TRUE(my_pm->private_ip_users_count < ip_n_to_1)) { + /* + * Try to find a PM with atlest 33% free and my_port free + */ + if (PREDICT_TRUE((my_pm->inuse < ((BITS_PER_INST*2)/3)) && + clib_bitmap_get_no_check(my_pm->bm, + i_port) == 1) +#ifndef NO_BULK_LOGGING + && check_if_stat_alloc_ok_for_bulk(my_pm, i_port, + bulk_size, + static_port_range) +#endif + ) { + found = 1; + break; + } + } + + } else { + /* + * Try to find a PM with atlest 33% free and my_port free + */ + if (PREDICT_TRUE((my_pm->inuse < ((BITS_PER_INST*2)/3)) && + clib_bitmap_get_no_check(my_pm->bm, + i_port) == 1) +#ifndef NO_BULK_LOGGING + && check_if_stat_alloc_ok_for_bulk(my_pm, i_port, + bulk_size, + static_port_range) +#endif + ) { + found = 1; + break; + } + } + my_index = (my_index + 1) % pm_len; + } + + /* + * If not found do it the hard way . + * "hard" way, best-fit. + */ + if (!found) { + u32 min_inuse_any, min_inuse_myport; + u32 min_index_any, min_index_myport; + + min_inuse_any = min_inuse_myport = PORTS_PER_ADDR + 1; + min_index_any = min_index_myport = ~0; + for (i = 0; i < pm_len; i++) { + my_pm = pm + i; + if(PREDICT_TRUE(ip_n_to_1)) { + if(PREDICT_TRUE(my_pm->private_ip_users_count < ip_n_to_1)) { + if (PREDICT_FALSE(my_pm->inuse < min_inuse_any)) { + min_inuse_any = my_pm->inuse; + min_index_any = my_pm - pm; + } + if (PREDICT_FALSE(my_pm->inuse < min_inuse_myport)) { + if (PREDICT_TRUE(clib_bitmap_get_no_check( + my_pm->bm,i_port) == 1) +#ifndef NO_BULK_LOGGING + && check_if_stat_alloc_ok_for_bulk(my_pm, + i_port,bulk_size,static_port_range) +#endif + ) { + min_inuse_myport = my_pm->inuse; + min_index_myport = my_pm - pm; + } + } + + } + + } else { + if (PREDICT_FALSE(my_pm->inuse < min_inuse_any)) { + min_inuse_any = my_pm->inuse; + min_index_any = my_pm - pm; + } + if (PREDICT_FALSE(my_pm->inuse < min_inuse_myport)) { + if (PREDICT_TRUE(clib_bitmap_get_no_check( + my_pm->bm, i_port) == 1) +#ifndef NO_BULK_LOGGING + && check_if_stat_alloc_ok_for_bulk(my_pm, i_port, + bulk_size, static_port_range) +#endif + ) { + min_inuse_myport = my_pm->inuse; + min_index_myport = my_pm - pm; + } + } + } + } + + /* + * Check if we have an exactly matching PM that has + * myport free. If so use it. If no such PM is + * available, use any PM + */ + if (PREDICT_TRUE(min_inuse_myport < PORTS_PER_ADDR)) { + my_pm = pm + min_index_myport; + my_index = min_index_myport; + found = 1; + } else if (PREDICT_TRUE(min_inuse_any < PORTS_PER_ADDR)) { + my_pm = pm + min_index_any; + my_index = min_index_any; + found = 1; + } + } + + if (!found) { + return (CNAT_NO_PORT_ANY); + } + break; + + case PORT_ALLOC_DIRECTED: + my_index = *index; + if (PREDICT_FALSE(my_index > pm_len)) { + return (CNAT_INV_PORT_DIRECT); + } + my_pm = pm + my_index; + break; + + default: + return (CNAT_ERR_PARSER); + } + + /* Allocate a matching port if possible */ + start_bit = i_port; + found = 0; + max_attempts = BITS_PER_INST; +#ifndef NO_BULK_LOGGING + if((BULK_ALLOC_SIZE_NONE != bulk_size) && + (i_port >= static_port_range)) { + start_bit = (start_bit/bulk_size) * bulk_size; + max_attempts = BITS_PER_INST/bulk_size; + } +#endif /* NO_BULK_LOGGING */ + + for (i = 0; i < max_attempts; i++) { +#ifndef NO_BULK_LOGGING + if((BULK_ALLOC_SIZE_NONE != bulk_size) && + (i_port >= static_port_range)) { + bit_test_result = cgn_clib_bitmap_check_if_all(my_pm->bm, + start_bit, bulk_size); + } + else +#endif /* #ifndef NO_BULK_LOGGING */ + bit_test_result = clib_bitmap_get_no_check(my_pm->bm, start_bit); + + if (PREDICT_TRUE(bit_test_result)) { +#ifndef NO_BULK_LOGGING + if((BULK_ALLOC_SIZE_NONE != bulk_size) && + (i_port >= static_port_range)) { + *nfv9_log_req = start_bit; + if(i==0) new_port = i_port; /* First go */ + else { + new_port = bit2port(start_bit); + if (pair_type == PORT_S_ODD && (new_port & 0x1) == 0) + new_port++; + } + found = 1; + break; + } + else { +#endif /* NO_BULK_LOGGING */ + new_port = bit2port(start_bit); + if (pair_type == PORT_S_ODD) { + if ((new_port & 0x1) == 1) { + found = 1; + break; + } + } else if (pair_type == PORT_S_EVEN) { + if ((new_port & 0x1) == 0) { + found = 1; + break; + } + } else { + found = 1; + break; + } +#ifndef NO_BULK_LOGGING + } +#endif + } +#ifndef NO_BULK_LOGGING + if((BULK_ALLOC_SIZE_NONE != bulk_size) && + (i_port >= static_port_range)) + start_bit = (start_bit + bulk_size) % BITS_PER_INST; + else { +#endif /* NO_BULK_LOGGING */ + start_bit = (start_bit + 1) % BITS_PER_INST; + if(PREDICT_FALSE(start_bit == 0)) { + start_bit = 1; /* Port 0 is invalid, so start from 1 */ + } +#ifndef NO_BULK_LOGGING + } +#endif + } /* End of for loop */ + + if (!found) { + /* Port allocation failure */ + if (atype == PORT_ALLOC_DIRECTED) { + return (CNAT_NOT_FOUND_DIRECT); + } else { + return (CNAT_NOT_FOUND_ANY); + } + } + + /* Accounting */ + cgn_clib_bitmap_clear_no_check(my_pm->bm, new_port); + (my_pm->inuse)++; + + *index = my_pm - pm; + *o_ipv4_address = my_pm->ipv4_address; + + *o_port = new_port; + + return (CNAT_SUCCESS); +} + +/* + * Try to allocate a portmap structure based on atype field + */ +cnat_portmap_v2_t * +cnat_dynamic_addr_alloc_from_pm ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + u32 *index, + cnat_errno_t *err, + u16 ip_n_to_1, + u32 *rseed_ip) +{ + u32 i, pm_len; + int my_index; + int min_inuse, min_index; + + cnat_portmap_v2_t *my_pm = 0; + *err = CNAT_NO_POOL_ANY; + + pm_len = vec_len(pm); + + switch(atype) { + case PORT_ALLOC_ANY: + if (PREDICT_FALSE(pm_len == 0)) { + my_pm = 0; + *err = CNAT_NO_POOL_ANY; + goto done; + } + + /* "Easy" way, first address with at least 200 free ports */ + for (i = 0; i < PORT_PROBE_LIMIT; i++) { + *rseed_ip = randq1(*rseed_ip); + my_index = (*rseed_ip) % pm_len; + my_pm = pm + my_index; + if (PREDICT_FALSE(ip_n_to_1)) { + if(PREDICT_TRUE(ip_n_to_1 == 1)) { + if (PREDICT_FALSE(0 == my_pm->inuse)) { + goto done; + } + } else { + if(PREDICT_TRUE(my_pm->private_ip_users_count < ip_n_to_1)) { + if (PREDICT_FALSE(my_pm->inuse < ((BITS_PER_INST*2)/3))) { + goto done; + } + } + } + } else { + if (PREDICT_FALSE(my_pm->inuse < ((BITS_PER_INST*2)/3))) { + goto done; + } + } + } + + /* "hard" way, best-fit. $$$$ Throttle complaint */ + min_inuse = PORTS_PER_ADDR + 1; + min_index = ~0; + for (i = 0; i < pm_len; i++) { + my_pm = pm + i; + if (PREDICT_FALSE(ip_n_to_1)) { + if(PREDICT_TRUE(ip_n_to_1 == 1)) { + if (PREDICT_FALSE(!my_pm->inuse)) { + min_inuse = my_pm->inuse; + min_index = my_pm - pm; + } + } else { + if(PREDICT_TRUE(my_pm->private_ip_users_count < ip_n_to_1)) { + if (PREDICT_TRUE(my_pm->inuse < min_inuse)) { + min_inuse = my_pm->inuse; + min_index = my_pm - pm; + } + + } + } + + } else { + if (PREDICT_TRUE(my_pm->inuse < min_inuse)) { + min_inuse = my_pm->inuse; + min_index = my_pm - pm; + } + } + } + + if (PREDICT_TRUE(min_inuse < PORTS_PER_ADDR)) { + my_pm = pm + min_index; + my_index = min_index; + goto done; + } + + /* Completely out of ports */ +#ifdef DEBUG_PRINTF_ENABLED + PLATFORM_DEBUG_PRINT("%s out of ports\n", __FUNCTION__); +#endif + + my_pm = 0; + *err = CNAT_NO_PORT_ANY; + break; + + + case PORT_ALLOC_DIRECTED: + //ASSERT(*index < pm_len); + if (PREDICT_FALSE(*index > pm_len)) { + my_pm = 0; + *err = CNAT_INV_PORT_DIRECT; + goto done; + } + my_pm = pm + *index; + my_index = *index; + break; + + default: + msg_spp_err("bad allocation type in cnat_port_alloc"); + my_pm = 0; + *err = CNAT_ERR_PARSER; + break; + } + + done: + if (PREDICT_FALSE(my_pm == NULL)) { + return (my_pm); + } + + if (PREDICT_FALSE(my_pm->inuse >= BITS_PER_INST)) { + my_pm = 0; + if (atype == PORT_ALLOC_DIRECTED) { + *err = CNAT_BAD_INUSE_DIRECT; + } else { + *err = CNAT_BAD_INUSE_ANY; + } + } + + return (my_pm); +} + + +/* + * cnat_port_alloc_v2 + * public ipv4 address/port allocator for dynamic ports + * + * 200K users / 20M translations means vec_len(cnat_portmap) will be + * around 300. + * + */ +cnat_errno_t +cnat_dynamic_port_alloc_v2 ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range +#ifndef NO_BULK_LOGGING + , bulk_alloc_size_t bulk_size, + int *nfv9_log_req +#endif + , u16 ip_n_to_1, + u32 *rseed_ip + ) +{ + int i; + cnat_errno_t my_err = CNAT_NO_POOL_ANY; + cnat_portmap_v2_t *my_pm = 0; + u16 start_bit; + u16 new_port; + uword bit_test_result; + uword max_trys_to_find_port; + + ASSERT(index); + ASSERT(o_ipv4_address); + ASSERT(o_port); + + my_pm = cnat_dynamic_addr_alloc_from_pm(pm, atype, index, &my_err, ip_n_to_1, + rseed_ip); + + if (PREDICT_FALSE(my_pm == NULL)) { + return (my_err); + } + if(PREDICT_FALSE(my_pm->dyn_full == 1)) { + if (atype == PORT_ALLOC_DIRECTED) { + return (CNAT_NOT_FOUND_DIRECT); + } else { + return (CNAT_NOT_FOUND_ANY); + } + } + +#if DEBUG > 1 + PLATFORM_DEBUG_PRINT("ALLOC_PORT_V2: My_Instance_Number %d: IP addr 0x%x, Inuse %d\n", + my_instance_number, my_pm->ipv4_address, my_pm->inuse); +#endif + + rseed_port = randq1(rseed_port); + + /* + * Exclude the static port range for allocating dynamic ports + */ + start_bit = (rseed_port) % (BITS_PER_INST - static_port_range); + start_bit = start_bit + static_port_range; + +#ifndef NO_BULK_LOGGING + *nfv9_log_req = BULK_ALLOC_NOT_ATTEMPTED; + if(BULK_ALLOC_SIZE_NONE != bulk_size) + { + /* We need the start port of the range to be alined on integer multiple + * of bulk_size */ + max_trys_to_find_port = BITS_PER_INST/bulk_size; + start_bit= ((start_bit + bulk_size -1)/bulk_size) * bulk_size; + } + else +#endif /* #ifndef NO_BULK_LOGGING */ + max_trys_to_find_port = BITS_PER_INST; + + /* Allocate a random port / port-pair */ + for (i = 0; i < max_trys_to_find_port; i++) { + + /* start_bit is only a u16.. so it can rollover and become zero */ + if (PREDICT_FALSE((start_bit >= BITS_PER_INST) || + (start_bit < static_port_range))) { + start_bit = static_port_range; +#ifndef NO_BULK_LOGGING + if(BULK_ALLOC_SIZE_NONE != bulk_size) { + start_bit= ((start_bit + bulk_size -1)/bulk_size) * bulk_size; + } +#endif /* #ifndef NO_BULK_LOGGING */ + } + /* Scan forward from random position */ +#ifndef NO_BULK_LOGGING + if(BULK_ALLOC_SIZE_NONE != bulk_size) { + bit_test_result = cgn_clib_bitmap_check_if_all(my_pm->bm, + start_bit, bulk_size); + } + else +#endif /* #ifndef NO_BULK_LOGGING */ + bit_test_result = clib_bitmap_get_no_check(my_pm->bm, start_bit); + + if (PREDICT_TRUE(bit_test_result)) { + new_port = bit2port(start_bit); +#ifndef NO_BULK_LOGGING + if(BULK_ALLOC_SIZE_NONE != bulk_size) + *nfv9_log_req = new_port; +#endif + if ((pair_type == PORT_S_ODD) && + (!(new_port & 0x1))) { +#ifndef NO_BULK_LOGGING + if(BULK_ALLOC_SIZE_NONE != bulk_size) { + start_bit++; /* Just use the next one in the bulk range */ + new_port++; + goto found2; + } +#endif /* #ifndef NO_BULK_LOGGING */ + goto notfound; + } else if ((pair_type == PORT_S_EVEN) && + (new_port & 0x1)) { + goto notfound; + } + + /* OK we got one or two suitable ports */ + goto found2; + } + + notfound: +#ifndef NO_BULK_LOGGING + if(BULK_ALLOC_SIZE_NONE != bulk_size) + start_bit += bulk_size; + else +#endif /* #ifndef NO_BULK_LOGGING */ + start_bit++; + + } /* end of for loop */ + + /* Completely out of ports */ + + /* Port allocation failure */ + /* set dyn_full flag. This would be used to verify + * for further dyn session before searching for port + */ + if (atype == PORT_ALLOC_DIRECTED) { + my_pm->dyn_full = 1; + return (CNAT_NOT_FOUND_DIRECT); + } else { + my_pm->dyn_full = 1; + return (CNAT_NOT_FOUND_ANY); + } + + + found2: + + /* Accounting */ + cgn_clib_bitmap_clear_no_check (my_pm->bm, start_bit); + (my_pm->inuse)++; + + *index = my_pm - pm; + *o_ipv4_address = my_pm->ipv4_address; + + *o_port = new_port; + return (CNAT_SUCCESS); +} + +#ifdef TOBE_PORTED +/* + * cnat_alloc_port_from_pm + * Given a portmap structure find port/port_pair that are free + * + * The assumption in this function is that bit in bm corresponds + * to a port number. This is TRUE and hence there is no call + * to the function bit2port here, though it is done in other + * places in this file. + * + */ +static u32 +cnat_alloc_port_from_pm ( + u32 start_port, + u32 end_port, + cnat_portmap_v2_t *my_pm, + port_pair_t pair_type +#ifndef NO_BULK_LOGGING + , bulk_alloc_size_t bulk_size, + int *nfv9_log_req +#endif /* #ifnded NO_BULK_ALLOCATION */ + ) +{ + u32 i; + u32 start_bit; + u32 total_ports = end_port - start_port + 1; + uword bit_test_result; + uword max_trys_to_find_port; + + rseed_port = randq1(rseed_port); + + start_bit = rseed_port % total_ports; + start_bit = start_bit + start_port; +#ifndef NO_BULK_LOGGING + *nfv9_log_req = BULK_ALLOC_NOT_ATTEMPTED; + if(BULK_ALLOC_SIZE_NONE != bulk_size) + { + /* We need the start port of the range to be alined on integer multiple + * of bulk_size */ + max_trys_to_find_port = total_ports/bulk_size; + start_bit= ((start_bit + bulk_size -1)/bulk_size) * bulk_size; + } + else +#endif /* #ifndef NO_BULK_LOGGING */ + max_trys_to_find_port = total_ports; + + /* Allocate a random port / port-pair */ + for (i = 0; i < max_trys_to_find_port; i++) { + /* start_bit is only a u16.. so it can rollover and become zero */ + if (PREDICT_FALSE((start_bit >= end_port) || + (start_bit < start_port))) { + start_bit = start_port; +#ifndef NO_BULK_LOGGING + if(BULK_ALLOC_SIZE_NONE != bulk_size) { + start_bit= ((start_bit + bulk_size -1)/bulk_size) * bulk_size; + } +#endif /* #ifndef NO_BULK_LOGGING */ + } + + /* Scan forward from random position */ +#ifndef NO_BULK_LOGGING + if(BULK_ALLOC_SIZE_NONE != bulk_size) { + bit_test_result = cgn_clib_bitmap_check_if_all(my_pm->bm, + start_bit, bulk_size); + } + else +#endif /* #ifndef NO_BULK_LOGGING */ + bit_test_result = clib_bitmap_get_no_check(my_pm->bm, start_bit); + + if (PREDICT_TRUE(bit_test_result)) { +#ifndef NO_BULK_LOGGING + if(BULK_ALLOC_SIZE_NONE != bulk_size) { + /* Got the entire bulk range */ + *nfv9_log_req = bit2port(start_bit); + return start_bit; + } else { +#endif /* #ifndef NO_BULK_LOGGING */ + /* + * For PORT_PAIR, first port has to be Even + * subsequent port <= end_port + * subsequent port should be unallocated + */ + if ((start_bit & 0x1) || + ((start_bit + 1) > end_port) || + (clib_bitmap_get_no_check(my_pm->bm, + (start_bit + 1)) == 0)) { + goto notfound; + } + return (start_bit); +#ifndef NO_BULK_LOGGING + } +#endif /* #ifndef NO_BULK_LOGGING */ + } /* if( free port found ) */ + +notfound: +#ifndef NO_BULK_LOGGING + if(BULK_ALLOC_SIZE_NONE != bulk_size) { + start_bit += bulk_size; + } else +#endif /* #ifndef NO_BULK_LOGGING */ + start_bit++; + + } + return (BITS_PER_INST); +} + +/* + * cnat_dynamic_port_alloc_rtsp + * public ipv4 address/port allocator for dynamic ports + * + * 200K users / 20M translations means vec_len(cnat_portmap) will be + * around 300. + * + */ + +cnat_errno_t +cnat_dynamic_port_alloc_rtsp ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u16 start_range, + u16 end_range, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port +#ifndef NO_BULK_LOGGING + , bulk_alloc_size_t bulk_size, + int *nfv9_log_req +#endif + , u32 *rseed_ip + ) +{ + + u32 current_timestamp; + cnat_errno_t my_err = CNAT_NO_POOL_ANY; + cnat_portmap_v2_t *my_pm = 0; + u32 alloc_bit; + + ASSERT(index); + ASSERT(o_ipv4_address); + ASSERT(o_port); + + my_pm = cnat_dynamic_addr_alloc_from_pm(pm, atype, index, &my_err, 0,rseed_ip); + + if (PREDICT_FALSE(my_pm == NULL)) { + return (my_err); + } + +#if DEBUG > 1 + PLATFORM_DEBUG_PRINT("ALLOC_PORT_V2: My_Instance_Number %d: IP addr 0x%x, Inuse %d\n", + my_instance_number, my_pm->ipv4_address, my_pm->inuse); +#endif + + alloc_bit = + cnat_alloc_port_from_pm(start_range, end_range, my_pm, pair_type +#ifndef NO_BULK_LOGGING + , bulk_size, nfv9_log_req +#endif /* #ifndef NO_BULK_LOGGING */ + ); + + if (alloc_bit < BITS_PER_INST) { + if (pair_type == PORT_PAIR) { + /* Accounting */ + cgn_clib_bitmap_clear_no_check (my_pm->bm, alloc_bit); + cgn_clib_bitmap_clear_no_check (my_pm->bm, alloc_bit+1); + (my_pm->inuse) += 2; + } else { + /* Accounting */ + cgn_clib_bitmap_clear_no_check (my_pm->bm, alloc_bit); + (my_pm->inuse)++; + } + + *index = my_pm - pm; + *o_ipv4_address = my_pm->ipv4_address; + + *o_port = bit2port(alloc_bit);; + + return (CNAT_SUCCESS); + } + + /* Completely out of ports */ + current_timestamp = spp_trace_log_get_unix_time_in_seconds(); + if (PREDICT_FALSE((current_timestamp - my_pm->last_sent_timestamp) > + 1000)) { + spp_printf(CNAT_NO_EXT_PORT_AVAILABLE, 0, NULL); + my_pm->last_sent_timestamp = current_timestamp; + } + + + /* Port allocation failure */ + if (atype == PORT_ALLOC_DIRECTED) { + return (CNAT_NOT_FOUND_DIRECT); + } else { + return (CNAT_NOT_FOUND_ANY); + } +} +#else +cnat_errno_t +cnat_dynamic_port_alloc_rtsp ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u16 start_range, + u16 end_range, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port +#ifndef NO_BULK_LOGGING + , bulk_alloc_size_t bulk_size, + int *nfv9_log_req +#endif + , u32 *rseed_ip + ) +{ + return (CNAT_NOT_FOUND_ANY); +} +#endif + + +/* + * cnat_mapped_static_port_alloc_v2 + * / + */ +cnat_errno_t +cnat_mapped_static_port_alloc_v2 ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + u32 *index, + u32 ipv4_address, + u16 port +#ifndef NO_BULK_LOGGING + , int *nfv9_log_req, + bulk_alloc_size_t bulk_size +#endif + , u16 ip_n_to_1 + ) +{ + int i; + u32 pm_len; + u16 bm_bit; + cnat_portmap_v2_t *my_pm = 0; + u32 my_index; + + ASSERT(index); + + /* + * Map the port to the bit in the pm bitmap structure. + * Note that we use ports from 1024..65535, so + * port number x corresponds to (x-1024) position in bitmap + */ + bm_bit = port2bit(port); + + pm_len = vec_len(pm); + + switch(atype) { + case PORT_ALLOC_ANY: + if (PREDICT_FALSE(pm_len == 0)) { + return (CNAT_NO_POOL_ANY); + } + + /* + * Find the pm that is allocated for this translated IP address + */ + my_index = pm_len; + + for (i = 0; i < pm_len; i++) { + my_pm = pm + i; + if (PREDICT_FALSE(my_pm->ipv4_address == ipv4_address)) { + my_index = i; + break; + } + } + + if ((PREDICT_FALSE(my_index >= pm_len)) || + ((PREDICT_FALSE(ip_n_to_1)) && (PREDICT_TRUE(my_pm->private_ip_users_count >= ip_n_to_1)))) { + return (CNAT_NO_POOL_ANY); + } + + break; + + case PORT_ALLOC_DIRECTED: + if (PREDICT_FALSE(*index > pm_len)) { + return (CNAT_INV_PORT_DIRECT); + } + + my_index = *index; + my_pm = pm + my_index; + if (PREDICT_FALSE(my_pm->ipv4_address != ipv4_address)) { + if (PREDICT_FALSE(global_debug_flag && CNAT_DEBUG_GLOBAL_ALL)) { + PLATFORM_DEBUG_PRINT("Delete all main db entry for that particular in ipv4 address\n"); + } + return (CNAT_INV_PORT_DIRECT); + } + + break; + + default: + msg_spp_err("bad allocation type in cnat_port_alloc"); + return (CNAT_ERR_PARSER); + } + + + if (PREDICT_FALSE(my_pm == NULL)) { + return (CNAT_NO_POOL_ANY); + } + + /* + * Check if the port is already allocated to some other mapping + */ + if (PREDICT_FALSE(clib_bitmap_get_no_check (my_pm->bm, bm_bit) == 0)) { + return (CNAT_NO_POOL_ANY); + } + +#if DEBUG > 1 + PLATFORM_DEBUG_PRINT("ALLOC_PORT_V2: My_Instance_Number %d: IP addr 0x%x, Inuse %d\n", + my_instance_number, my_pm->ipv4_address, my_pm->inuse); +#endif + + /* + * Indicate that the port is already allocated + */ + cgn_clib_bitmap_clear_no_check (my_pm->bm, bm_bit); + (my_pm->inuse)++; + + *index = my_index; + + return (CNAT_SUCCESS); +} + +void cnat_port_free_v2 ( + cnat_portmap_v2_t *pm, + int index, + port_pair_t pair_type, + u16 base_port, + u16 static_port_range) +{ + cnat_portmap_v2_t *my_pm; + uword bit; + + /* check for valid portmap */ + if (PREDICT_FALSE(index > vec_len(pm))) { + spp_printf(CNAT_INVALID_INDEX_TO_FREE_PORT, 0, 0); + return; + } + + my_pm = pm + index; + bit = port2bit(base_port); + +#if DEBUG > 0 + if(clib_bitmap_get_no_check(my_pm->bm, bit)) + ASSERT(clib_bitmap_get_no_check(my_pm->bm, bit) == 0); +#endif + + cgn_clib_bitmap_set_no_check(my_pm->bm, bit); + + my_pm->inuse -= 1; + if(base_port >= static_port_range) { + /* Clear the full flag. we can have a new dynamic session now */ + my_pm->dyn_full = 0; + } + + return; +} + +void cnat_portmap_dump_v2 (cnat_portmap_v2_t *pm, u16 print_limit) +{ + int i; + u32 inuse =0; + + ASSERT(pm); + + for (i = 0; i < BITS_PER_INST; i++) { + if (PREDICT_FALSE(clib_bitmap_get_no_check (pm->bm, i) == 0)) { + if (PREDICT_TRUE(inuse++ < print_limit)) + PLATFORM_DEBUG_PRINT(" %d", bit2port(i)); + } + } + if (PREDICT_FALSE(inuse >= print_limit)) { + PLATFORM_DEBUG_PRINT("%d printed, print limit is %d\n", + inuse, print_limit); + } + PLATFORM_DEBUG_PRINT("\n"); +} + + +/* + * cnat_ports_init + */ +clib_error_t *cnat_ports_init(vlib_main_t *vm) +{ + cnat_ports_main_t *mp = &cnat_ports_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + /* suppress crypto-random port numbering */ +#ifdef SOON + if (spp_get_int_prop("no_crypto_random_ports") == 0) + crypto_random32(&seed); +#endif + + return 0; +} + +VLIB_INIT_FUNCTION(cnat_ports_init); + diff --git a/vnet/vnet/vcgn/cnat_ports.h b/vnet/vnet/vcgn/cnat_ports.h new file mode 100644 index 00000000000..bc1fb0d24a8 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_ports.h @@ -0,0 +1,208 @@ +/* + *------------------------------------------------------------------ + * cnat_ports.h - port database definitions + * + * Copyright (c) 2007-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_PORTS_H__ +#define __CNAT_PORTS_H__ + +#include "platform_common.h" +#include "cnat_bulk_port_defs.h" + +#define PORTS_PER_ADDR 65536 + +#define CNAT_INSTS PLATFORM_CNAT_INSTS + +#define BITS_PER_INST (PORTS_PER_ADDR) + +/* + * Ensure that atleast few 4 bit ports are available for RTSP + * in case we want to map 4 digit inside ports to 4 digit outside ports + */ +#define MIN_STATIC_PORT_RANGE_FOR_RTSP (9900) + +extern u8 my_instance_number; + +/* + * Now it is a 1-to-1 mapping between bit and port values + */ +static inline u16 bit2port (u32 bit) +{ + return bit; +} + +static inline uword port2bit (u16 port) +{ + return port; +} + +/* + * Port bitmap structure + * THIS structure is not used to be REMOVED.... + */ + + +typedef struct { + u32 ipv4_address; /* native bit order */ + u16 vrf; + u16 pad; + u32 threshold_crossed; + uword bm[(BITS_PER_INST + BITS(uword)-1)/BITS(uword)]; +} cnat_portmap_t; + +//cnat_portmap_t *cnat_portmap; + + +typedef struct { + u32 inuse; + u32 delete_time; + u32 ipv4_address; /* native bit order */ + u32 last_sent_timestamp; + uword bm[(BITS_PER_INST + BITS(uword)-1)/BITS(uword)]; + u32 dyn_full; + u32 private_ip_users_count; /* number of private ip's(subscribers) to this + public ip */ +} cnat_portmap_v2_t; + + +typedef enum { + PORT_SINGLE=0, + PORT_PAIR=1, + PORT_S_EVEN=2, + PORT_S_ODD=3, +} port_pair_t; + +typedef enum { + PORT_TYPE_DYNAMIC=0, + PORT_TYPE_STATIC=1, + PORT_TYPE_RTSP=2, +} port_type_t; + + +typedef enum { + PORT_ALLOC_ANY=1, + PORT_ALLOC_DIRECTED=2, +} port_alloc_t; + +#define PORT_PROBE_LIMIT 20 + + +/* + * randq1 + * Linear congruential random number generator with + * extensively studied properties. See Numerical Recipes in C + * 2nd Ed. page 284. Known to behave according to the test vector + * supplied in the text, on X86 and Octeon. + */ +static inline u32 randq1 (u32 prev) +{ + return (1664525L*prev + 1013904223L); +} + +cnat_errno_t +cnat_static_port_alloc_v2( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u32 i_ipv4_address, + u16 i_port, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range +#ifndef NO_BULK_LOGGING + , bulk_alloc_size_t bulk_size, + int *nfv9_log_req +#endif /* NO_BULK_LOGGING */ + , u16 ip_n_to_1 + ); + +cnat_errno_t +cnat_mapped_static_port_alloc_v2 ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + u32 *index, + u32 ipv4_address, + u16 port +#ifndef NO_BULK_LOGGING + , int *nfv9_log_req, + bulk_alloc_size_t bulk_size +#endif + , u16 ip_n_to_1 + ); + +cnat_errno_t +cnat_dynamic_port_alloc_v2( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port, + u16 static_port_range +#ifndef NO_BULK_LOGGING + , bulk_alloc_size_t bulk_size, + int *nfv9_log_req +#endif + , u16 ip_n_to_1, + u32 *rseed_ip + ); + + +cnat_errno_t +cnat_dynamic_port_alloc_rtsp ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u16 start_range, + u16 end_range, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port +#ifndef NO_BULK_LOGGING + , bulk_alloc_size_t bulk_size, + int *nfv9_log_req +#endif + , u32 *rseed_ip + ); + +void cnat_port_free_v2( + cnat_portmap_v2_t *pm, + int index, + port_pair_t ptype, + u16 base_port, + u16 static_port_range); + +void cnat_portmap_dump_v2(cnat_portmap_v2_t *pm, + u16 print_limit); + + + +cnat_errno_t +nat64_static_port_alloc ( + cnat_portmap_v2_t *pm, + port_alloc_t atype, + port_pair_t pair_type, + u32 *i_ipv6_address, + u16 i_port, + u32 *index, + u32 *o_ipv4_address, + u16 *o_port); + + + +#endif /* __CNAT_PORTS_H__ */ diff --git a/vnet/vnet/vcgn/cnat_show.c b/vnet/vnet/vcgn/cnat_show.c new file mode 100644 index 00000000000..70476193797 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_show.c @@ -0,0 +1,807 @@ +/* + *------------------------------------------------------------------ + * cnat_show.c - translation database definitions + * + * Copyright (c) 2007-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> +#include <vppinfra/clib.h> + +#include "cnat_db.h" +#include "cnat_config.h" +#include "cnat_global.h" +#include "cnat_logging.h" +#include "spp_ctx.h" +#include "spp_timers.h" +#include "platform_common.h" +#include "cnat_syslog.h" +#include "cnat_v4_pptp_alg.h" +#include "platform_common.h" + +#ifndef TOBE_PORTED +/* The following variable is in cnat_config_msg_handler.c which + * is to be ported later.. if required + */ +u32 total_address_pool_allocated = 0; +#endif + +#ifndef NO_BULK_LOGGING +#define CNAT_MY_VRFMAP_PRINT \ +PLATFORM_DEBUG_PRINT("i-uidx 0x%x o-uidx 0x%x i-vrfid 0x%x o-vrfid 0x%x\n" \ + "status %d del time 0x%x tcp mss 0x%x pm list 0x%x\n" \ + "bulk size %d\n" \ + "ip n:1 %d\n" \ + "NFV9 template index 0x%x\n" \ + "SYSLOG template index 0x%x\n" \ + "Netflow Session Logging %d \n" \ + "Syslog Session Logging %d \n" \ + "PCP Server 0x%x, Port %u \n", \ + my_vrfmap->i_vrf, my_vrfmap->o_vrf, my_vrfmap->i_vrf_id, \ + my_vrfmap->o_vrf_id, my_vrfmap->status, my_vrfmap->delete_time, \ + my_vrfmap->tcp_mss, my_vrfmap->portmap_list, \ + BULKSIZE_FROM_VRFMAP(my_vrfmap), \ + my_vrfmap->ip_n_to_1, \ + my_vrfmap->nfv9_logging_index, \ + my_vrfmap->syslog_logging_index,\ + my_vrfmap->nf_logging_policy, \ + my_vrfmap->syslog_logging_policy, \ + my_vrfmap->pcp_server_addr, \ + my_vrfmap->pcp_server_port); +#else +#define CNAT_MY_VRFMAP_PRINT \ +PLATFORM_DEBUG_PRINT("i-uidx 0x%x o-uidx 0x%x i-vrfid 0x%x o-vrfid 0x%x\n" \ + "status %d del time 0x%x tcp mss 0x%x pm list 0x%x\n" \ + "NFV9 template index 0x%x\n ip n:1 %d\n", \ + my_vrfmap->i_vrf, my_vrfmap->o_vrf, my_vrfmap->i_vrf_id, \ + my_vrfmap->o_vrf_id, my_vrfmap->status, my_vrfmap->delete_time, \ + my_vrfmap->tcp_mss, my_vrfmap->portmap_list, \ + my_vrfmap->nfv9_logging_index, my_vrfmap->ip_n_to_1); +#endif /* NO_BULK_LOGGING */ + +#define CNAT_MY_LOGGING_INFO_PRINT \ +do { \ + cnat_syslog_logging_info_t *my_syslog_info = 0; \ + PLATFORM_DEBUG_PRINT("SYSLOG config: \n"); \ + pool_foreach (my_syslog_info, cnat_syslog_logging_info_pool, ({ \ + if (my_syslog_info->i_vrf == my_vrfmap->i_vrf) { \ + PLATFORM_DEBUG_PRINT(" \ + ipv4[0x%x], port[%u], hostname[%s]\n", \ + my_syslog_info->ipv4_address, my_syslog_info->port, \ + my_syslog_info->header_hostname); \ + break; \ + } \ + })); \ +}while (0) \ +; + + +void printf_ipv4(u32 ad) +{ + u8 a __attribute__((unused)), b __attribute__((unused)), + c __attribute__((unused)), d __attribute__((unused)); + + a = ad>>24; + b = (ad>>16) & 0xFF; + c = (ad>>8) & 0xFF; + d = (ad>>0) & 0xFF; + + PLATFORM_DEBUG_PRINT("%d.%d.%d.%d", a, b, c, d); +} +void cnat_main_db_entry_dump (cnat_main_db_entry_t *db) +{ + PLATFORM_DEBUG_PRINT("Main DB entry at %p, index %ld dst_ip %x\n", + db, db - cnat_main_db, db->dst_ipv4); + /* only dump hash next index if it's non EMPTY */ + if (db->out2in_hash.next != EMPTY || db->in2out_hash.next != EMPTY) + PLATFORM_DEBUG_PRINT("out2in hash %u, in2out hash %u\n", + db->out2in_hash.next, + db->in2out_hash.next); + PLATFORM_DEBUG_PRINT("out2in key ipv4 0x%08X, port 0x%04X (%5d), vrf %d, protocol %s\n", + db->out2in_key.k.ipv4, + db->out2in_key.k.port, + db->out2in_key.k.port, + db->out2in_key.k.vrf & CNAT_VRF_MASK, + (db->out2in_key.k.vrf & CNAT_PRO_MASK) == CNAT_UDP ? "UDP" : + ((db->in2out_key.k.vrf & CNAT_PRO_MASK) == CNAT_TCP ? "TCP" : + ((db->in2out_key.k.vrf & CNAT_PRO_MASK) == CNAT_ICMP ? "ICMP" : "PPTP ALG"))); + + PLATFORM_DEBUG_PRINT("in2out key ipv4 0x%08X, port 0x%04X (%5d), vrf %d, protocol %s\n", + db->in2out_key.k.ipv4, + db->in2out_key.k.port, + db->in2out_key.k.port, + db->in2out_key.k.vrf & CNAT_VRF_MASK, + (db->in2out_key.k.vrf & CNAT_PRO_MASK) == CNAT_UDP ? "UDP" : + ((db->in2out_key.k.vrf & CNAT_PRO_MASK) == CNAT_TCP ? "TCP" : + ((db->in2out_key.k.vrf & CNAT_PRO_MASK) == CNAT_ICMP ? "ICMP" : "UNKNOWN"))); + + PLATFORM_DEBUG_PRINT("user %d, user ports (nxt) %d (prev) %d, vrfmap_index 0x%x\n", + db->user_index, db->user_ports.next, db->user_ports.prev, + db->vrfmap_index); + PLATFORM_DEBUG_PRINT("timeout %d \n", db->timeout); + PLATFORM_DEBUG_PRINT("flags 0x%x ", db->flags); + + if (db->flags & CNAT_DB_FLAG_TCP_ACTIVE) { + PLATFORM_DEBUG_PRINT(" TCP_ACTIVE "); + } else if (db->flags & CNAT_DB_FLAG_UDP_ACTIVE) { + PLATFORM_DEBUG_PRINT(" UDP_ACTIVE "); + } else if (db->flags & CNAT_DB_FLAG_STATIC_PORT) { + PLATFORM_DEBUG_PRINT(" STATIC_PORT "); + } + + PLATFORM_DEBUG_PRINT(" ALG dlt0 0x%02X dlt1 0x%02X\n", db->alg.alg_dlt[0], db->alg.alg_dlt[1]); + PLATFORM_DEBUG_PRINT("\n"); + + PLATFORM_DEBUG_PRINT("out2in_pkts: %u\n", db->out2in_pkts); + PLATFORM_DEBUG_PRINT("in2out_pkts: %u\n", db->in2out_pkts); + PLATFORM_DEBUG_PRINT("entry_expires: %u current time: %u\n", db->entry_expires, cnat_current_time); + PLATFORM_DEBUG_PRINT("-------------------------\n"); +} + +void cnat_user_db_entry_dump (cnat_user_db_entry_t *up) +{ + u32 db_entry_index, first_db_entry_index; + cnat_main_db_entry_t *ep; + + PLATFORM_DEBUG_PRINT("User DB entry at %p, index %ld\n", + up, up - cnat_user_db); + PLATFORM_DEBUG_PRINT("translation list head index %u, %u translations portmapindex 0x%x\n", + up->translation_list_head_index, + up->ntranslations, up->portmap_index); + PLATFORM_DEBUG_PRINT("source ipv4 0x%x, source port 0x%x, vrf %d\n", + up->key.k.ipv4, + up->key.k.port, + up->key.k.vrf); + first_db_entry_index = db_entry_index = up->translation_list_head_index; + if (first_db_entry_index != EMPTY) { + PLATFORM_DEBUG_PRINT("Port translation list:\n"); + do { + PLATFORM_DEBUG_PRINT(" [%d]\n", db_entry_index); + ep = cnat_main_db + db_entry_index; + db_entry_index = ep->user_ports.next; + } while (first_db_entry_index != db_entry_index); + } else { + PLATFORM_DEBUG_PRINT("WARNING: empty translation list!\n"); + } + PLATFORM_DEBUG_PRINT("-------------------------\n"); +} + +void cnat_user_db_entry_dump_summary (cnat_user_db_entry_t *up) +{ + u32 db_entry_index, first_db_entry_index; + u32 total_entries = 0; + + PLATFORM_DEBUG_PRINT("User DB entry at %p, index %ld\n", + up, up - cnat_user_db); + PLATFORM_DEBUG_PRINT("translation list head index %u, %u translations portmapindex 0x%x\n", + up->translation_list_head_index, + up->ntranslations, up->portmap_index); + PLATFORM_DEBUG_PRINT("source ipv4 0x%x, source port 0x%x, vrf %d\n", + up->key.k.ipv4, + up->key.k.port, + up->key.k.vrf); + first_db_entry_index = db_entry_index = up->translation_list_head_index; + if (first_db_entry_index != EMPTY) { + PLATFORM_DEBUG_PRINT("Port translation list:\n"); + do { + total_entries++; + } while (first_db_entry_index != db_entry_index); + PLATFORM_DEBUG_PRINT("TOTAL_ENTRIES: %d\n", total_entries); + } else { + PLATFORM_DEBUG_PRINT("WARNING: empty translation list!\n"); + } + PLATFORM_DEBUG_PRINT("-------------------------\n"); +} + +/* for internal development and UT only */ +void cnat_db_dump_main_by_index (int argc, unsigned long *argv) +{ + u32 index, i, len; + u32 active_count, scan_count; + + if (argc != 1) { + PLATFORM_DEBUG_PRINT("invalid input %d\n", argc); + return; + } + + index = argv[0]; + + len = vec_len(cnat_main_db); + + active_count = pool_elts(cnat_main_db); + + if (index >= active_count) { + PLATFORM_DEBUG_PRINT("Index %u >= total active entries %u\n", index, active_count); + return; + } + + scan_count = 0; + for (i=0; i< len; i++) { + if(pool_is_free_index(cnat_main_db, i)) continue; + + if (index == scan_count) { + cnat_main_db_entry_dump(cnat_main_db + i); + break; + } + scan_count++; + } +} + +void cnat_db_dump_main (int argc, unsigned long *argv) +{ + cnat_main_db_entry_t *db; + + pool_foreach(db, cnat_main_db, ({ + cnat_main_db_entry_dump(db); + })); +} + +void cnat_db_dump_main_summary (int argc, unsigned long *argv) +{ + cnat_main_db_entry_t *db; + u32 num_entries = 0; + + pool_foreach(db, cnat_main_db, ({ + num_entries++; + })); + + PLATFORM_DEBUG_PRINT("\nNum main entries %d\n", num_entries); +} + +void cnat_db_dump_user (int argc, unsigned long *argv) +{ + cnat_user_db_entry_t *up; + + pool_foreach(up, cnat_user_db, ({ + cnat_user_db_entry_dump(up); + })); +} + +void cnat_db_dump_user_summary (int argc, unsigned long *argv) +{ + cnat_user_db_entry_t *up; + + pool_foreach(up, cnat_user_db, ({ + cnat_user_db_entry_dump_summary(up); + })); +} + +void cnat_db_dump_hashes (int argc, unsigned long *argv) +{ + int i; + + PLATFORM_DEBUG_PRINT("Main DB out2in hash:\n"); + for (i = 0; i < vec_len(cnat_out2in_hash); i++) { + if (cnat_out2in_hash[i].next != EMPTY) { + PLATFORM_DEBUG_PRINT("[%d]: %u\n", i, cnat_out2in_hash[i].next); + } + } + PLATFORM_DEBUG_PRINT("Main DB in2out hash:\n"); + for (i = 0; i < vec_len(cnat_in2out_hash); i++) { + if (cnat_in2out_hash[i].next != EMPTY) { + PLATFORM_DEBUG_PRINT("[%d]: %u\n", i, cnat_in2out_hash[i].next); + } + } + + PLATFORM_DEBUG_PRINT("User hash:\n"); + for (i = 0; i < vec_len(cnat_user_hash); i++) { + if (cnat_user_hash[i].next != EMPTY) { + PLATFORM_DEBUG_PRINT("[%d]: %u\n", i, cnat_user_hash[i].next); + } + } + PLATFORM_DEBUG_PRINT("-------------------------\n"); +} + + +#ifdef OLD_VRFMAP + +void cnat_db_dump_cdb (int argc, unsigned long *argv) +{ + int k; + int verbose=0; + int all = 0; + + if (argc > 0) { + verbose = 1; + } + + if (argc > 1) { + all = 1; + } + + PLATFORM_DEBUG_PRINT ("%d vrfmap vectors \n", vec_len(cnat_portmap_by_vrf)); + + for (k = 0; k < vec_len(cnat_portmap_by_vrf); k++) { + PLATFORM_DEBUG_PRINT("index%d: status %d i_vrf 0x%x o_vrf 0x%x\n", k, + cnat_portmap_by_vrf[k].status, + cnat_portmap_by_vrf[k].i_vrf, + cnat_portmap_by_vrf[k].o_vrf); + cnat_db_dump_address_portmap(verbose, all, + cnat_portmaps[k], + cnat_portmaps_inuse[k]); + } +} + +void cnat_db_dump_i_vrf (int argc, unsigned long *argv) +{ + u32 k; + u32 vrf =0; + int verbose=0; + int all = 0; + + if (!argc) { + PLATFORM_DEBUG_PRINT("need vrf input ,return\n"); + return; + } + + if (argc > 0) { + vrf = argv[0]; + } + + if (argc > 1) { + verbose = 1; + } + + if (argc > 2) { + all = 1; + } + + PLATFORM_DEBUG_PRINT ("%d vrfmap vectors \n", vec_len(cnat_portmap_by_vrf)); + + for (k = 0; k < vec_len(cnat_portmap_by_vrf); k++) { + if (cnat_portmap_by_vrf[k].i_vrf == vrf) { + PLATFORM_DEBUG_PRINT("%d: i_vrf 0x%x o_vrf 0x%x\n", k, + cnat_portmap_by_vrf[k].i_vrf, + cnat_portmap_by_vrf[k].o_vrf); + cnat_db_dump_address_portmap(verbose, all, + cnat_portmaps[k], + cnat_portmaps_inuse[k]); + return; + } + } + PLATFORM_DEBUG_PRINT("not found\n"); +} + +void cnat_db_dump_o_vrf (int argc, unsigned long *argv) +{ + u32 k; + int verbose=0; + int all = 0; + u32 vrf =0; + + if (!argc) { + PLATFORM_DEBUG_PRINT("need vrf input ,return\n"); + return; + } + + if (argc > 0) { + vrf = argv[0]; + } + + if (argc > 1) { + verbose = 1; + } + + if (argc > 2) { + all = 1; + } + + PLATFORM_DEBUG_PRINT ("%d vrfmap vectors \n", vec_len(cnat_portmap_by_vrf)); + + for (k = 0; k < vec_len(cnat_portmap_by_vrf); k++) { + if (cnat_portmap_by_vrf[k].o_vrf == vrf) { + PLATFORM_DEBUG_PRINT("index%d: status %d i_vrf 0x%x o_vrf 0x%x\n", k, + cnat_portmap_by_vrf[k].status, + cnat_portmap_by_vrf[k].i_vrf, + cnat_portmap_by_vrf[k].o_vrf); + cnat_db_dump_address_portmap(verbose, all, + cnat_portmaps[k], + cnat_portmaps_inuse[k]); + return; + } + } + PLATFORM_DEBUG_PRINT("not found\n"); +} +#endif + +#ifdef TOBE_PORTED +/* This does not seem to be used */ +void cnat_db_mem_usage_cmd (int argc, unsigned long *argv) +{ + pool_header_t * p; + _VEC *_v; + u32 bitmap_bytes=0, free_indices_bytes=0, vec_bytes=0, total_bytes=0; + + if (cnat_main_db) { + p = pool_header(cnat_main_db); + if (p->free_bitmap) { + _v = _vec_find(p->free_bitmap); + bitmap_bytes = _v->alen; + } else { + bitmap_bytes = 0; + } + if (p->free_indices) { + _v = _vec_find(p->free_indices); + free_indices_bytes = _v->alen; + } else { + free_indices_bytes = 0; + } + _v = _vec_find(cnat_main_db); + vec_bytes = _v->alen; + } else { + vec_bytes = 0; + } + + total_bytes = bitmap_bytes + free_indices_bytes + vec_bytes; + + PLATFORM_DEBUG_PRINT ("Main DB: %d total bytes, %d bitmap, %d indices, %d vec\n", + total_bytes, bitmap_bytes, free_indices_bytes, vec_bytes); + PLATFORM_DEBUG_PRINT (" vector length %d\n", vec_len(cnat_main_db)); + + if (cnat_user_db) { + p = pool_header(cnat_user_db); + if (p->free_bitmap) { + _v = _vec_find(p->free_bitmap); + bitmap_bytes = _v->alen; + } else { + bitmap_bytes = 0; + } + if (p->free_indices) { + _v = _vec_find(p->free_indices); + free_indices_bytes = _v->alen; + } else { + free_indices_bytes = 0; + } + _v = _vec_find(cnat_user_db); + vec_bytes = _v->alen; + } else { + vec_bytes = 0; + } + + total_bytes = bitmap_bytes + free_indices_bytes + vec_bytes; + + PLATFORM_DEBUG_PRINT ("User DB: %d total bytes, %d bitmap, %d indices, %d vec\n", + total_bytes, bitmap_bytes, free_indices_bytes, vec_bytes); + PLATFORM_DEBUG_PRINT (" vector length %d\n", vec_len(cnat_user_db)); + + _v = _vec_find(cnat_out2in_hash); + PLATFORM_DEBUG_PRINT("out2in hash: %d total bytes\n", _v->alen); + + _v = _vec_find(cnat_in2out_hash); + PLATFORM_DEBUG_PRINT("in2out hash: %d total bytes\n", _v->alen); +} +#endif + +static void print_server_ip_address (vlib_main_t *vm, u32 ip) +{ + unsigned char bytes[4]; + bytes[0] = ip & 0xFF; + bytes[1] = (ip >> 8) & 0xFF; + bytes[2] = (ip >> 16) & 0xFF; + bytes[3] = (ip >> 24) & 0xFF; + vlib_cli_output(vm, "\tIP Address %d.%d.%d.%d\n", bytes[0], bytes[1], bytes[2], bytes[3]); +} + +void cnat_nfv9_show_collector (vlib_main_t *vm, cnat_nfv9_logging_info_t *my_nfv9_logging_info) +{ + nfv9_server_info_t *server = nfv9_server_info_pool + + my_nfv9_logging_info->server_index; + + vlib_cli_output(vm,"\tVRF - 0x%x - %s\n", my_nfv9_logging_info->i_vrf, + my_nfv9_logging_info->deleted?"DELETED":"ACTIVE"); + print_server_ip_address(vm, clib_net_to_host_u32(server->ipv4_address)); + vlib_cli_output(vm,"\tIP port %d\n", server->port); + vlib_cli_output(vm,"\tTimeout %d\n", server->timeout_rate); + vlib_cli_output(vm,"\tRefresh %d\n", server->refresh_rate); + vlib_cli_output(vm,"\tMax PkSz %d\n", my_nfv9_logging_info->max_length_minus_max_record_size); +} + +void cnat_db_dump_policy (int argc, unsigned long *argv) +{ + + PLATFORM_CNAT_DB_DUMP_POLICY_PRINT(); + + if (cnat_nfv9_global_info.cnat_nfv9_init_done) { + if (cnat_nfv9_global_info.cnat_nfv9_global_collector_index != EMPTY) { + cnat_nfv9_logging_info_t *my_nfv9_logging_info; + nfv9_server_info_t *server __attribute__((unused)); + + my_nfv9_logging_info = cnat_nfv9_logging_info_pool + + cnat_nfv9_global_info.cnat_nfv9_global_collector_index; + server = nfv9_server_info_pool + + my_nfv9_logging_info->server_index; + + PLATFORM_DEBUG_PRINT("NFv9 logging ip 0x%x port 0x%x refresh-rate %d timeout %d\n", + server->ipv4_address, + server->port, + server->refresh_rate, + server->timeout_rate); + PLATFORM_DEBUG_PRINT("NFv9 path_mtu = %d\n", + my_nfv9_logging_info->max_length_minus_max_record_size); + } else { + PLATFORM_DEBUG_PRINT("NFv9 global logging is not configured\n"); + } + } else { + PLATFORM_DEBUG_PRINT("NFv9 LOGGING is not configured\n"); + } + +} + +#ifdef OLD_VRFMAP +void cnat_show_cdb (int verbose) +{ + int k, l, i; + for (i = 0; i < vec_len(cnat_portmap_by_vrf); i++) { + PLATFORM_DEBUG_PRINT("i_vrf %d : o_vrf %d\n", + cnat_portmap_by_vrf[i].i_vrf, + cnat_portmap_by_vrf[i].o_vrf); + } + + PLATFORM_DEBUG_PRINT("port limit %d\n", cnat_main_db_max_ports_per_user); + + PLATFORM_DEBUG_PRINT ("%d portmap vectors\n", vec_len(cnat_portmaps)); + + for (k = 0; k < vec_len(cnat_portmaps); k++) { + cnat_portmap_t *pm; + u16 *inuse; + pm = cnat_portmaps[k]; + inuse = cnat_portmaps_inuse[k]; + for (l = 0; l < vec_len(pm); l++) { + if (inuse[l] || verbose ) { + u32 net_address; + net_address = + spp_host_to_net_byte_order_32((pm+l)->ipv4_address); + printf_ipv4(net_address); + PLATFORM_DEBUG_PRINT (": %d inuse\n", inuse[l]); + if (verbose && inuse[l]) { + cnat_portmap_dump (pm+l, inuse+l); + } + } + } + } +} +#endif + + + +/* v2 show command */ +void cnat_show_address_portmap_sumary (cnat_portmap_v2_t *pm) +{ + cnat_portmap_v2_t *my_pm =0; + u32 first_address = 0; + u32 second_address = 0; + u32 last_address = 0; + u32 i, pm_len; + + if ((pm_len = vec_len(pm))) { + PLATFORM_DEBUG_PRINT("%d portmap in this list 0x%lx\n", + pm_len, (u32)pm); + for (i = 0; i < pm_len; i++) { + my_pm = pm + i; + if (!first_address) { + first_address = my_pm->ipv4_address; + } else if (!second_address) { + second_address = my_pm->ipv4_address; + } + last_address = my_pm->ipv4_address; + } + + if (first_address) { + PLATFORM_DEBUG_PRINT("1. 0x%08x", first_address); + } + if (second_address) { + PLATFORM_DEBUG_PRINT(", 2. 0x%08x", second_address); + } + + if ((last_address != first_address) && + (last_address != second_address)) { + PLATFORM_DEBUG_PRINT(", ....., %d. 0x%08x", pm_len, last_address); + } + PLATFORM_DEBUG_PRINT("\n"); + } else { + PLATFORM_DEBUG_PRINT("ZERO POOL ADDRESSES in this list 0x%x \n", (u32)pm); + } +} + + +void cnat_show_address_portmap (int verbose, int all, + cnat_portmap_v2_t *pm, u16 port_limit) +{ + cnat_portmap_v2_t *my_pm =0; + u32 i, pm_len; + + pm_len = vec_len(pm); + if (!all) { + cnat_show_address_portmap_sumary(pm); + } else { + PLATFORM_DEBUG_PRINT("%d portmap in this list 0x%x \n", pm_len, (u32)pm); + } + + for (i = 0; i < pm_len; i++) { + + my_pm = pm + i; + if (all) { + PLATFORM_DEBUG_PRINT("pm:0x%x ip address:0x%x del_time 0x%x inuse:%d\n", + (u32)my_pm, my_pm->ipv4_address, my_pm->delete_time, my_pm->inuse); + } else if (my_pm->inuse) { + PLATFORM_DEBUG_PRINT("pm:0x%x ip address:0x%x inuse:%d\n", + (u32)my_pm, my_pm->ipv4_address, my_pm->inuse); + } + + if (verbose && (my_pm->inuse)) { + if(PREDICT_FALSE(!port_limit)) { + cnat_portmap_dump_v2 (my_pm, cnat_main_db_max_ports_per_user); + } + else { + cnat_portmap_dump_v2 (my_pm, port_limit); + } + } + } + + PLATFORM_DEBUG_PRINT("\n"); +} + + +void cnat_show_cdb_v2 (int verbose, int all) +{ + cnat_vrfmap_t *my_vrfmap = 0; + cnat_portmap_v2_t *pm =0; + PLATFORM_DEBUG_PRINT("port limit %d\n", cnat_main_db_max_ports_per_user); + PLATFORM_DEBUG_PRINT("total address pool allocated %d\n", total_address_pool_allocated); + PLATFORM_DEBUG_PRINT("icmp rate limit %d (per core %d)\n", + cnat_main_db_icmp_rate_limit, cnat_main_db_icmp_rate_limit_core); + PLATFORM_DEBUG_PRINT("dynamic port range start %d\n", cnat_static_port_range); + if (pptp_cfg.enable == PPTP_DISABLED) { + PLATFORM_DEBUG_PRINT("PPTP alg disabled \n"); + } else { + PLATFORM_DEBUG_PRINT("PPTP alg enabled \n"); + } + + if (ftp_alg_enabled) { + PLATFORM_DEBUG_PRINT("FTP alg enabled\n"); + } else { + PLATFORM_DEBUG_PRINT("FTP alg disabled\n"); + } + + pool_foreach (my_vrfmap, cnat_map_by_vrf, ({ + CNAT_MY_VRFMAP_PRINT + CNAT_MY_LOGGING_INFO_PRINT + PLATFORM_DEBUG_PRINT("per vrf port limit %d\n", my_vrfmap->port_limit); + pm = my_vrfmap->portmap_list; + cnat_show_address_portmap(verbose, all, pm, my_vrfmap->port_limit); + + })); +} + + +void cnat_show_cdb_command_v2(int argc, unsigned long *argv) +{ + int verbose=0; + int all = 0; + + if (argc > 0) { + verbose = 1; + } + + if (argc > 1) { + all = 1; + } + + cnat_show_cdb_v2(verbose, all); +} + +void cnat_show_ivrf_command_v2 (int argc, unsigned long *argv) +{ + u32 vrf =0; + int verbose=0; + int all = 0; + cnat_vrfmap_t *my_vrfmap = 0; + cnat_portmap_v2_t *pm =0; + + if (!argc) { + PLATFORM_DEBUG_PRINT("need vrf input ,return\n"); + return; + } + if (argc > 0) { + vrf = argv[0]; + } + if (argc > 1) { + verbose = 1; + } + if (argc > 2) { + all = 1; + } + PLATFORM_DEBUG_PRINT ("%lld vrfmap vectors \n", pool_elts(cnat_map_by_vrf)); + pool_foreach (my_vrfmap, cnat_map_by_vrf, ({ + if (my_vrfmap->i_vrf == vrf) { + CNAT_MY_VRFMAP_PRINT + pm = my_vrfmap->portmap_list; + cnat_show_address_portmap(verbose, all, pm,my_vrfmap->port_limit); + return; + } + })); + PLATFORM_DEBUG_PRINT("not found\n"); +} + +void cnat_show_ovrf_command_v2 (int argc, unsigned long *argv) +{ + u32 not_found =1; + u32 vrf =0; + int verbose=0; + int all = 0; + cnat_vrfmap_t *my_vrfmap = 0; + cnat_portmap_v2_t *pm =0; + + if (!argc) { + PLATFORM_DEBUG_PRINT("need vrf input ,return\n"); + return; + } + if (argc > 0) { + vrf = argv[0]; + } + if (argc > 1) { + verbose = 1; + } + if (argc > 2) { + all = 1; + } + PLATFORM_DEBUG_PRINT("%d vrfmap vectors \n", pool_elts(cnat_map_by_vrf)); + pool_foreach (my_vrfmap, cnat_map_by_vrf, ({ + if (my_vrfmap->o_vrf == vrf) { + CNAT_MY_VRFMAP_PRINT + pm = my_vrfmap->portmap_list; + cnat_show_address_portmap(verbose, all, pm,my_vrfmap->port_limit); + not_found = 0; + } + })); + if (not_found) { + PLATFORM_DEBUG_PRINT("not found\n"); + } +} + +void cnat_timeout_db_entry_dump (cnat_timeout_db_entry_t *up) +{ + u32 db_entry_index __attribute__((unused)), + first_db_entry_index __attribute__((unused)); + + PLATFORM_DEBUG_PRINT("Timeout DB entry at index %ld\n", up - cnat_timeout_db); + PLATFORM_DEBUG_PRINT("Desnt key 0x%16llx\n", up->t_key.timeout_key.key64); + PLATFORM_DEBUG_PRINT("Timeout value %d\n", up->t_key.timeout_value); + PLATFORM_DEBUG_PRINT("Hash Next 0x%x\n", up->t_hash.next); + +} + +void cnat_db_dump_timeout () +{ + cnat_timeout_db_entry_t *up; + pool_header_t *h; + u32 used __attribute__((unused)), free __attribute__((unused)); + + h = pool_header(cnat_timeout_db); + free = vec_len(h->free_indices); + used = (vec_len(cnat_timeout_db) - free); + + PLATFORM_DEBUG_PRINT("Timeout DB Free %d, Used %d\n",free, used); + + pool_foreach(up, cnat_timeout_db, ({ + cnat_timeout_db_entry_dump(up); + })); +} + diff --git a/vnet/vnet/vcgn/cnat_show_api.h b/vnet/vnet/vcgn/cnat_show_api.h new file mode 100644 index 00000000000..5904c7e2dd6 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_show_api.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CNAT_SHOW_API_H__ +#define __CNAT_SHOW_API_H__ + +typedef struct _spp_api_cnat_v4_show_inside_entry_req { + u16 _spp_msg_id; + u16 vrf_id; + u32 ipv4_addr; + u16 start_port; + u16 end_port; + u8 flags; + u8 all_entries; + u8 protocol; +} spp_api_cnat_v4_show_inside_entry_req_t; + +typedef struct _spp_api_cnat_v4_show_outside_entry_req { + u16 _spp_msg_id; + u16 vrf_id; + u32 ipv4_addr; + u16 start_port; + u16 end_port; + u8 flags; + u8 protocol; +} spp_api_cnat_v4_show_outside_entry_req_t; + + +#endif diff --git a/vnet/vnet/vcgn/cnat_show_response.h b/vnet/vnet/vcgn/cnat_show_response.h new file mode 100644 index 00000000000..bec1bd97245 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_show_response.h @@ -0,0 +1,580 @@ +/* + *------------------------------------------------------------------ + * cnat_show_response.h show command response structs + * + * Copyright (c) 2007-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_SHOW_RESPONSE_H__ +#define __CNAT_SHOW_RESPONSE_H__ + +/* + * Flags indicating the type of translation entry + */ +#define CNAT_TRANSLATION_ENTRY_ALL 0x0 +#define CNAT_TRANSLATION_ENTRY_STATIC 0x1 +#define CNAT_TRANSLATION_ENTRY_ALG 0x2 +#define CNAT_TRANSLATION_ENTRY_DYNAMIC 0x4 + +/* for PCP support */ +#define CNAT_TRANSLATION_ENTRY_PCPI_DYNAMIC 0x08 +#define CNAT_TRANSLATION_ENTRY_PCPE_DYNAMIC 0x10 + +#define MAX_NODE_NAME_LEN 18 +#define MAX_CTR_NAME_LEN 10 + +/* + * show translation entry response structures + */ +typedef struct { + u16 call_id; + u16 cnat_call_id; /* mapped call Id */ + u16 dst_call_id; /* dest call id */ +} cnat_v4_show_gre_entry; + +typedef struct { + u16 msg_id; + u16 rc; /* o/p parameter. */ + u16 num_entries; /* Number of entries sent as output */ + u16 vrf_id; /* vrf id */ + u32 pns_ip; + cnat_v4_show_gre_entry entries[0]; +} cnat_v4_show_gre_entry_resp; + +/* + * show translation entry response structures + */ +typedef struct { + u32 ipv4_addr; + u16 src_port; + u16 cnat_port; /* port which replaced the src port */ + u8 protocol; + u8 pad; + u16 flags; + u16 nsessions; + u32 in2out_packets; + u32 out2in_packets; +} cnat_v4_show_translation_entry; + +typedef struct { + u16 msg_id; + u8 rc; /* o/p parameter. */ + u8 num_entries; /* Number of entries sent as output */ + u16 vrf_id; /* vrf id */ + u16 pad; + cnat_v4_show_translation_entry entries[0]; +} cnat_v4_show_entry_resp; + +/* + * show free/used ipv4 address/port response structure + */ +typedef struct { + u32 ip_addr; + u32 free_used_ports; +} cnat_v4_show_free_used_entry; + +typedef struct { + u16 msg_id; + u8 rc; + u8 count; + u32 max_ports; + cnat_v4_show_free_used_entry entry_list[0]; +} cnat_v4_show_free_used_entry_resp; + +/* + * Node name to id mapping + */ +typedef struct { + u8 node_id; + u8 pad; + char node_name[MAX_NODE_NAME_LEN]; +} cnat_statistics_node_name; + +typedef struct { + u16 msg_id; + u8 rc; + u8 num_nodes; + cnat_statistics_node_name node_name_array[0]; +} cnat_statistics_node_name_mapping_resp; + +/* + * Counter name to id mapping + */ +typedef struct { + u8 node_id; + u8 counter_id; + char counter_name[MAX_CTR_NAME_LEN]; +} cnat_statistics_counter_name; + +typedef struct { + u16 msg_id; + u8 rc; + u8 num_counters; + cnat_statistics_counter_name counter_name_array[0]; +} cnat_statistics_counter_name_mapping_resp; + + +/* + * Node name to id mapping + */ +typedef struct { + u16 msg_id; + u8 rc; + u8 num_nodes; + u32 pad; + u64 counters [0]; +} cnat_statistics_counter_values; + +/* + * Summary Stats + */ +typedef struct { + u32 eaddr; + u32 ports_used; +} pool_address_usage_t; + +typedef struct { + u16 msg_id; + u8 rc; + u8 pad; + u16 max_pkt_size; + u16 pool_address_copied; + u32 active_translations; + u32 translation_create_rate; + u32 translation_delete_rate; + u32 in2out_forwarding_rate; + u32 out2in_forwarding_rate; + u32 dummy; + u64 in2out_drops_port_limit_exceeded; + u64 in2out_drops_system_limit_reached; + u64 in2out_drops_resource_depletion; + u64 no_translation_entry_drops; + u64 pptp_active_tunnels; + u64 pptp_active_channels; + u64 pptp_ctrlmsg_drops; + u32 no_sessions; + + u32 pool_address_totally_free; + u32 pool_address_used; /* The following array size will be lesser of + (pool_address_used, 200) */ + u32 num_subscribers; + u64 drops_sessiondb_limit_exceeded; + u64 in2out_drops_src_ip_no_config; // for deterministic nat on brahmos + pool_address_usage_t pool_address_usage[0]; +} cnat_show_statistics_summary_resp; + + +typedef struct { + u16 msg_id; + u8 rc; + u8 pad; + u64 o2i_tcp_seq_mismatch_drop; + u64 o2i_tcp_seq_mismatch; + u64 o2i_sessions_created; + u64 o2i_end_point_filter_drop; +} cnat_show_counters_summary_resp; + + +typedef struct { + u16 msg_id; + u8 rc; + u8 pad; + + /* + * XLAT statistics + */ + u64 v6_to_v4_tcp_input_count; + u64 v6_to_v4_tcp_non_translatable_drop_count; + u64 v6_to_v4_tcp_invalid_next_hdr_drop_count; + u64 v6_to_v4_tcp_no_db_drop_count; + u64 v6_to_v4_tcp_output_count; + + u64 v4_to_v6_tcp_input_count; + u64 v4_to_v6_tcp_no_db_drop_count; + u64 v4_to_v6_tcp_output_count; + + u64 v6_to_v4_udp_input_count; + u64 v6_to_v4_udp_non_translatable_drop_count; + u64 v6_to_v4_udp_invalid_next_hdr_drop_count; + u64 v6_to_v4_udp_no_db_drop_count; + u64 v6_to_v4_udp_output_count; + + u64 v4_to_v6_udp_input_count; + u64 v4_to_v6_udp_no_db_drop_count; + u64 v4_to_v6_udp_output_count; + u64 v4_to_v6_udp_frag_crc_zero_drop_count; + u64 v4_to_v6_udp_crc_zero_recycle_sent_count; + u64 v4_to_v6_udp_crc_zero_recycle_drop_count; + + u64 v6_to_v4_icmp_qry_input_count; + u64 v6_to_v4_icmp_no_db_drop_count; + u64 v6_to_v4_icmp_frag_drop_count; + u64 v6_to_v4_icmp_invalid_next_hdr_drop_count; + u64 v6_to_v4_icmp_non_translatable_drop_count; + u64 v6_to_v4_icmp_non_translatable_fwd_count; + u64 v6_to_v4_icmp_unsupported_type_drop_count; + u64 v6_to_v4_icmp_err_output_count; + u64 v6_to_v4_icmp_qry_output_count; + + u64 v4_to_v6_icmp_qry_input_count; + u64 v4_to_v6_icmp_no_db_drop_count; + u64 v4_to_v6_icmp_frag_drop_count; + u64 v4_to_v6_icmp_unsupported_type_drop_count; + u64 v4_to_v6_icmp_err_output_count; + u64 v4_to_v6_icmp_qry_output_count; + + u64 v6_to_v4_subsequent_frag_input_count; + u64 v6_to_v4_subsequent_frag_non_translatable_drop_count; + u64 v6_to_v4_subsequent_frag_invalid_next_hdr_drop_count; + u64 v6_to_v4_subsequent_frag_no_db_drop_count; + u64 v6_to_v4_subsequent_frag_output_count; + + u64 v4_to_v6_subsequent_frag_input_count; + u64 v4_to_v6_subsequent_frag_no_db_drop_count; + u64 v4_to_v6_subsequent_frag_output_count; + + u64 v4_to_v6_subsequent_frag_drop_count; + u64 v4_to_v6_subsequent_frag_throttled_count; + u64 v4_to_v6_subsequent_frag_timeout_drop_count; + u64 v4_to_v6_subsequent_frag_tcp_input_count; + u64 v4_to_v6_subsequent_frag_udp_input_count; + u64 v4_to_v6_subsequent_frag_icmp_input_count; + + u64 v6_to_v4_options_input_count; + u64 v6_to_v4_options_drop_count; + u64 v6_to_v4_options_forward_count; + u64 v6_to_v4_options_no_db_drop_count; + u64 v6_to_v4_unsupp_proto_count; + + u64 v4_to_v6_options_input_count; + u64 v4_to_v6_options_drop_count; + u64 v4_to_v6_options_forward_count; + u64 v4_to_v6_options_no_db_drop_count; + u64 v4_to_v6_unsupp_proto_count; + + u64 v4_icmp_gen_count; + u64 v6_icmp_gen_count; +} xlat_show_statistics_summary_resp; + +typedef struct { + u16 msg_id; + u8 rc; + u8 pad; + /* Total v4 packets to BR */ + u64 v4_to_v6_input_total_count; + /* Total v4 tunneled packets to BR */ + u64 v4_to_v6_41_input_total_count; + /* proto 41 packets without minimum, of 40, v6 payload */ + u64 v4_to_v6_41_insuff_v6payld_count; + /* total proto 41 packets being considered for decap */ + u64 v4_to_v6_41_valid_count; + /* proto 41 packets that failed security check*/ + u64 v4_to_v6_41_sec_check_fail_count; + /* packets with no active db entry */ + u64 v4_to_v6_no_db_drop_count; + /* proto 41 packets actually getting decapped */ + u64 v4_to_v6_41_decap_count; + /* total v4 packets which are neither icmp nor 41 */ + u64 v4_to_v6_unsupported_protocol_count; + /* v4 tunneled packets with invalid v6 source address */ + u64 v4_to_v6_41_invalid_v6_source; + /* total icmpv4 packets destined to BR */ + u64 v4_forus_icmp_input_count; + /* total icmpv4 echo replies by BR */ + u64 v4_icmp_reply_count; + /* total icmpv4 error messages translated to icmpv6 by BR */ + u64 v4_to_v6_icmp_translation_count; + /* total packets with icmpv4 type/code which are not supported by BR */ + u64 v4_icmp_unsupported_count; + /* total icmpv4 packets which are rate-limited by BR */ + u64 v4_icmp_throttled_count; + /* total ICMPv4 error messages which could not be translated */ + u64 v4_icmp_non_translatable_drop_count; + + /* ipv4 defrag stats */ + u64 v4_to_v6_frag_input_count; + u64 v4_to_v6_frag_sec_check_fail_count; + u64 v4_to_v6_frag_reassem_count; + u64 v4_to_v6_frag_timeout_drop_count; + u64 v4_to_v6_frag_icmp_input_count; + u64 v4_to_v6_frag_41_insuff_v6payld_count; + u64 v4_to_v6_frag_no_db_drop_count; + u64 v4_to_v6_frag_unsupported_protocol_count; + u64 v4_to_v6_frag_41_invalid_v6_source; + u64 v4_to_v6_frag_throttled_count; + u64 v4_to_v6_frag_dup_count; + u64 v4_to_v6_frag_reassem_frag_count; + u64 v4_to_v6_frag_disable_count; + u64 v4_to_v6_frag_drop_count; + + /* total v6 packets input to BR */ + u64 v6_to_v4_total_input_count; + /* v6 packets with no active db entry */ + u64 v6_to_v4_no_db_drop_count; + /* forus v6 packets with next header other than icmpv6 */ + u64 v6_to_v4_forus_unsupp_proto_count; + /* total v6 packets that got tunneled */ + u64 v6_to_v4_encap_count; + /* total icmpv6 packets destined to BR */ + u64 v6_forus_icmp_input_count; + /* total icmpv6 echo replies by BR */ + u64 v6_icmp_reply_count; + /* total icmpv6 PTB messages generated by BR */ + u64 v6_ptb_generated_count; + /* total ipv6 packets for which PTBv6 was NOT generated by BR */ + u64 v6_ptb_not_generated_drop_count; + /* total icmpv6 Neighbor Advertisements generated by BR */ + u64 v6_na_generated_count; + /* total icmpv6 TTL expiry messages generated by BR */ + u64 v6_ttl_expiry_generated_count; + /* total ICMPv6 fragments, which are dropped by BR */ + u64 v6_to_v4_frag_icmp_input_count; + /* total packets with icmpv6 type/code which are not supported by BR */ + u64 v6_icmp_unsupported_count; + /* total icmpv6 packets which are rate-limited by BR */ + u64 v6_icmp_throttled_count; +} v6rd_show_statistics_summary_resp; + +typedef struct { + u16 msg_id; + u8 rc; + u8 pad; + + /* Total Incoming Count */ + u64 v4_input_count; + /* Total Drop Count */ + u64 v4_drop_count; + /* Total Output Count */ + u64 v4_to_v6_output_count; + /* TCP Incoming Count */ + u64 v4_tcp_input_count; + /* TCP Output Count */ + u64 v4_tcp_output_count; + /* UDP Incoming Count */ + u64 v4_udp_input_count; + /* UDP Output Count */ + u64 v4_udp_output_count; + /* ICMPv4 Incoming Count */ + u64 v4_icmp_input_count; + /* ICMPv4 Output Count */ + u64 v4_to_v6_icmp_output_count; + /* Invalid UIDB Drop Count */ + u64 v4_invalid_uidb_drop_count; + /* NoDb Drop Count */ + u64 v4_no_db_drop_count; + /* TTL Expire Drop Count */ + u64 v4_ttl_expire_drop_count; + /* Invalid IP Destination Drop Count */ + u64 v4_invalid_destination_prefix_drop_count; + /* Packet Execeeding Path MTU Drop Count */ + u64 v4_path_mtu_exceed_count; + /* Unsupported Protocol Drop Count */ + u64 v4_invalid_proto_type_drop_count; + /* ICMPv4 Generated for TTL Expire Count */ + u64 v4_ttl_expiry_generated_count; + /* ICMPv4 Generated for Error Count */ + u64 v4_icmp_error_gen_count; + /* ICMPv4 Packets Rate-Limited Count */ + u64 v4_icmp_throttled_drop_count; + /* TCP MSS Changed Count */ + u64 v4_tcp_mss_changed_count; + + /* Total Incoming Count */ + u64 v6_input_count; + /* Total Drop Count */ + u64 v6_drop_count; + /* Total Output Count */ + u64 v6_to_v4_output_count; + /* TCP Incoming Count */ + u64 v6_tcp_input_count; + /* TCP Output Count */ + u64 v6_tcp_output_count; + /* UDP Incoming Count */ + u64 v6_udp_input_count; + /* UDP Output Count */ + u64 v6_udp_output_count; + /* ICMPv4 Incoming Count */ + u64 v6_icmpv4_input_count; + /* ICMPv4 Output Count */ + u64 v6_icmpv4_output_count; + /* Invalid UIDB Drop Count */ + u64 v6_invalid_uidb_drop_count; + /* NoDb Drop Count */ + u64 v6_no_db_drop_count; + /* TTL Expire Drop Count */ + u64 v6_ttl_expire_drop_count; + /* Invalid IPv6 Destination Drop Count */ + u64 v6_invalid_destination_drop_count; + /* Invalid Source Prefix Drop Count */ + u64 v6_invalid_source_prefix_drop_count; + /* Unsupported Protocol Drop Count */ + u64 v6_invalid_proto_type_drop_count; + /* ICMPv6 Input Count */ + u64 v6_icmp_input_count; + /* ICMPv6 Invalid UIDB Drop Count */ + u64 v6_icmp_invalid_uidb_drop_count; + /* ICMPv6 NoDb Drop Count */ + u64 v6_icmp_no_db_drop_count; + /* ICMPv6 TTL Expire Drop Count */ + u64 v6_icmp_ttl_expire_drop_count; + /* ICMPv6 Invalid IPv6 Destination Drop Count */ + u64 v6_icmp_invalid_destination_drop_count; + /* ICMPv6 Unsupported Type Drop Count */ + u64 v6_icmp_unsupported_type_drop_count; + /* ICMPv6 Invalid NxtHdr Drop Count*/ + u64 v6_icmp_unsupported_nxthdr_drop_count; + /* ICMPv6 Frag Drop Count */ + u64 v6_icmp_frag_drop_count; + /* ICMPv6 Forus Count */ + u64 v6_forus_icmp_input_count; + /* ICMPv6 Echo Response Received Count */ + u64 v6_received_echo_response_count; + /* ICMPv6 Echo Replies Count */ + u64 v6_echo_reply_count; + /* ICMPv6 Translated to ICMPV4 Output Count*/ + u64 v6_to_v4_icmp_output_count; + /* ICMPv6 Generated for TTL Expire Count */ + u64 v6_ttl_expiry_generated_count; + /* ICMPv6 Generated for Error Count */ + u64 v6_icmp_error_gen_count; + /* ICMPv6 Packets Rate-Limited Count */ + u64 v6_icmp_throttled_drop_count; + /* TCP MSS Changed Count */ + u64 v6_tcp_mss_changed_count; + + /*Total Input Count*/ + u64 v4_to_v6_frag_input_count; + /*Total Drop Count*/ + u64 v4_to_v6_frag_drop_count; + /*Reassembled Output Count*/ + u64 v4_to_v6_frag_reassem_count; + + /*TCP Input Count*/ + u64 v4_to_v6_frag_tcp_input_count; + /*UDP Input Count*/ + u64 v4_to_v6_frag_udp_input_count; + /*ICMPv4 Input Count*/ + u64 v4_to_v6_frag_icmp_input_count; + + /*Invalid UIDB Drop Count */ + u64 v4_to_v6_frag_invalid_uidb_drop_count; + /*NoDb Drop Count*/ + u64 v4_to_v6_frag_no_db_drop_count; + /*Unsupported Protocol Drop Count*/ + u64 v4_to_v6_frag_invalid_proto_type_drop_count; + /*Throttled Count*/ + u64 v4_to_v6_frag_throttled_count; + /*Timeout Drop Count*/ + u64 v4_to_v6_frag_timeout_drop_count; + /*Duplicates Drop Count*/ + u64 v4_to_v6_frag_dup_count; + + /*Total Input Count*/ + u64 v6_to_v4_inner_frag_input_count; + /*Total Drop Count*/ + u64 v6_to_v4_inner_frag_drop_count; + /*Total Output Count*/ + u64 v6_to_v4_inner_frag_output_count; + + /*TCP Input Count*/ + u64 v6_to_v4_inner_frag_tcp_input_count; + /*UDP Input Count*/ + u64 v6_to_v4_inner_frag_udp_input_count; + /*ICMPv4 Input Count*/ + u64 v6_to_v4_inner_frag_icmp_input_count; + + /*Invalid Source Prefix Drop Count*/ + u64 v6_to_v4_inner_frag_invalid_source_prefix_drop_count; + /*Unsupported Protocol Drop Count*/ + u64 v6_to_v4_inner_frag_invalid_proto_type_drop_count; + /*Throttled Count*/ + u64 v6_to_v4_inner_frag_throttled_count; + /*Timeout Drop Count*/ + u64 v6_to_v4_inner_frag_timeout_drop_count; + /*Duplicates Drop Count*/ + u64 v6_to_v4_inner_frag_dup_count; + + /*ICMPv6 Generated for Error Count */ + u64 v6_to_v4_inner_frag_icmp_error_gen_count; + /*ICMPv6 Packets Rate-Limited Count */ + u64 v6_to_v4_inner_frag_icmp_throttled_drop_count; + + /*TCP MSS Changed Count */ + u64 v6_to_v4_inner_frag_tcp_mss_changed_count; + +} mape_show_statistics_summary_resp; + +/* + * The following are the command types for Generic Command cases + */ +#define CNAT_DEBUG_GENERIC_COMMAND_READ_MEM 1 +#define CNAT_DEBUG_GENERIC_COMMAND_WRITE_MEM 2 +#define CNAT_DEBUG_GENERIC_COMMAND_DB_SUMMARY 3 +#define CNAT_DEBUG_GENERIC_COMMAND_USER_DB_PM 4 +#define CNAT_DEBUG_GET_CGN_DB_SUMMARY 5 + +typedef enum { + CNAT_DEBUG_GENERIC_COMMAND_DUMP_POLICY, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_MAIN_DB, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_USER_DB, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_HASHES_DB, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_VRF_MAP, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_SUMMARY_DB, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_STATS, + CNAT_DEBUG_GENERIC_COMMAND_CLEAR_STATS, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_NODE_COUNTER, + CNAT_DEBUG_GENERIC_COMMAND_CLEAR_NODE_COUNTER, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_CNAT_COUNTER, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_VA, + CNAT_DEBUG_GENERIC_COMMAND_SHOW_CONFIG, + CNAT_DEBUG_GENERIC_COMMAND_SHOW_NFV9, + CNAT_DEBUG_GENERIC_COMMAND_SHOW_IVRF, + CNAT_DEBUG_GENERIC_COMMAND_SHOW_OVRF, + CNAT_DEBUG_SPP_LOG, + CNAT_DEBUG_GENERIC_COMMAND_DEBUG_OPTIONS, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_DEBUG_LEVELS, + CNAT_DEBUG_GENERIC_COMMAND_DEBUG_FLAGS, + CNAT_READ_TEMP_SENSORS, + CNAT_BLOCK_OCTEON_SENSOR_READ, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_MAIN_DB_SUMMARY, + CNAT_DEBUG_GENERIC_COMMAND_DUMP_USER_DB_SUMMARY, + CNAT_DEBUG_DUMP_6RD_STATS, + CNAT_DEBUG_TIMEOUT_DB_SUMMARY, + CNAT_NAT64_STFUL_DEBUG_COMMAND, + CNAT_DEBUG_SET_BULK_SIZE, + CNAT_DEBUG_SHOW_BULK_STAT, + CNAT_DEBUG_CLEAR_BULK_STAT, + CNAT_DEBUG_SHOW_BULK_ALLOC, + CNAT_DEBUG_NAT64, + CNAT_DEBUG_NAT44_IN2OUT_FRAG_STATS, +} cnat_debug_dump_type_t; + +typedef enum { + CNAT_DEBUG_FLAG_UDP_INSIDE_CHECKSUM_MODIFY, + CNAT_DEBUG_FLAG_UDP_OUTSIDE_CHECKSUM_MODIFY, + CNAT_DEBUG_FLAG_UDP_INSIDE_PACKET_DUMP, + CNAT_DEBUG_FLAG_UDP_OUTSIDE_PACKET_DUMP, +} cnat_debug_flag_type_t; + +typedef struct { + u16 spp_msg_id; + u8 rc; + u8 core; + u32 num_bytes; + u8 raw_data[0]; +} cnat_generic_command_resp; + +extern u32 db_free_entry (void * p); +#endif /*__CNAT_SHOW_RESPONSE_H__*/ diff --git a/vnet/vnet/vcgn/cnat_syslog.c b/vnet/vnet/vcgn/cnat_syslog.c new file mode 100644 index 00000000000..9c69d4a260d --- /dev/null +++ b/vnet/vnet/vcgn/cnat_syslog.c @@ -0,0 +1,1787 @@ +/* + *------------------------------------------------------------------ + * cnat_syslog.c + * + * Copyright (c) 2011-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <arpa/inet.h> +#include "cnat_syslog.h" +#include "platform_common.h" +#include "cnat_db.h" +#include "cnat_log_common.h" +#include <vppinfra/pool.h> + +#define SYSLOG_DELIMITER ' ' +#define SYSLOG_FIELD_ABSENT '-' +/* #define SHOW_SYSLOG_TIMESTAMP 1 TO DO. Remove this later */ +/* + * Defining the below macro here for now. Assumption is, syslog packets + * are sent out via same channel as that of NFV9. + * Has to be overridden if this assumption is false. + */ +#define PLATFORM_SYSLOG_DISP_NODE_IDX PLATFORM_NFV9_DISP_NODE_IDX + +cnat_syslog_global_info_t cnat_syslog_global_info; +cnat_syslog_logging_info_t *cnat_syslog_logging_info_pool; +cnat_syslog_global_counters_t cnat_syslog_global_counter; +extern u32 syslog_debug_flag; + +#define CNAT_SYSLOG_DEBUG_CODE 2 + +#if CNAT_SYSLOG_DEBUG_CODE > 3 +#define SYSLOG_COND if(my_instance_number == 0) + +#define SYSLOG_DEBUG_PRINTF1(a) SYSLOG_COND printf(a); +#define SYSLOG_DEBUG_PRINTF2(a, b) SYSLOG_COND printf(a, b); +#define SYSLOG_DEBUG_PRINTF3(a, b, c) SYSLOG_COND printf(a, b, c); +#define SYSLOG_DEBUG_PRINTF4(a, b, c, d) SYSLOG_COND printf(a, b, c, d); + +#else + +#define SYSLOG_DEBUG_PRINTF1(a) +#define SYSLOG_DEBUG_PRINTF2(a, b) +#define SYSLOG_DEBUG_PRINTF3(a, b, c) +#define SYSLOG_DEBUG_PRINTF4(a, b, c, d) + +#endif + + +void syslog_params_show(u32 logging_index) +{ + cnat_syslog_logging_info_t *log_info __attribute__((unused)); + if(logging_index == EMPTY) { + PLATFORM_DEBUG_PRINT("\nSyslog logging not configured\n"); + return; + } + + log_info = cnat_syslog_logging_info_pool + logging_index; + + PLATFORM_DEBUG_PRINT("\nSyslog parameters --\n"); + PLATFORM_DEBUG_PRINT("IPV4 address: %x, port %d, max log size %d\n", + log_info->ipv4_address, + log_info->port, log_info->max_length_minus_max_record_size); + PLATFORM_DEBUG_PRINT("Host name: %s, priority %d", + log_info->header_hostname, log_info->header_priority); + +} + +/* Util function to copy a number as ASCII in to a buf in a + * faster way (should be faster than sprintf) + */ + +const unsigned char ascii_numbers[][3] = + { {'0', '0', '0'}, + {'1', '0', '0'}, + {'2', '0', '0'}, + {'3', '0', '0'}, + {'4', '0', '0'}, + {'5', '0', '0'}, + {'6', '0', '0'}, + {'7', '0', '0'}, + {'8', '0', '0'}, + {'9', '0', '0'}, + {'1', '0', '0'}, + {'1', '1', '0'}, + {'1', '2', '0'}, + {'1', '3', '0'}, + {'1', '4', '0'}, + {'1', '5', '0'}, + {'1', '6', '0'}, + {'1', '7', '0'}, + {'1', '8', '0'}, + {'1', '9', '0'}, + {'2', '0', '0'}, + {'2', '1', '0'}, + {'2', '2', '0'}, + {'2', '3', '0'}, + {'2', '4', '0'}, + {'2', '5', '0'}, + {'2', '6', '0'}, + {'2', '7', '0'}, + {'2', '8', '0'}, + {'2', '9', '0'}, + {'3', '0', '0'}, + {'3', '1', '0'}, + {'3', '2', '0'}, + {'3', '3', '0'}, + {'3', '4', '0'}, + {'3', '5', '0'}, + {'3', '6', '0'}, + {'3', '7', '0'}, + {'3', '8', '0'}, + {'3', '9', '0'}, + {'4', '0', '0'}, + {'4', '1', '0'}, + {'4', '2', '0'}, + {'4', '3', '0'}, + {'4', '4', '0'}, + {'4', '5', '0'}, + {'4', '6', '0'}, + {'4', '7', '0'}, + {'4', '8', '0'}, + {'4', '9', '0'}, + {'5', '0', '0'}, + {'5', '1', '0'}, + {'5', '2', '0'}, + {'5', '3', '0'}, + {'5', '4', '0'}, + {'5', '5', '0'}, + {'5', '6', '0'}, + {'5', '7', '0'}, + {'5', '8', '0'}, + {'5', '9', '0'}, + {'6', '0', '0'}, + {'6', '1', '0'}, + {'6', '2', '0'}, + {'6', '3', '0'}, + {'6', '4', '0'}, + {'6', '5', '0'}, + {'6', '6', '0'}, + {'6', '7', '0'}, + {'6', '8', '0'}, + {'6', '9', '0'}, + {'7', '0', '0'}, + {'7', '1', '0'}, + {'7', '2', '0'}, + {'7', '3', '0'}, + {'7', '4', '0'}, + {'7', '5', '0'}, + {'7', '6', '0'}, + {'7', '7', '0'}, + {'7', '8', '0'}, + {'7', '9', '0'}, + {'8', '0', '0'}, + {'8', '1', '0'}, + {'8', '2', '0'}, + {'8', '3', '0'}, + {'8', '4', '0'}, + {'8', '5', '0'}, + {'8', '6', '0'}, + {'8', '7', '0'}, + {'8', '8', '0'}, + {'8', '9', '0'}, + {'9', '0', '0'}, + {'9', '1', '0'}, + {'9', '2', '0'}, + {'9', '3', '0'}, + {'9', '4', '0'}, + {'9', '5', '0'}, + {'9', '6', '0'}, + {'9', '7', '0'}, + {'9', '8', '0'}, + {'9', '9', '0'}, + {'1', '0', '0'}, + {'1', '0', '1'}, + {'1', '0', '2'}, + {'1', '0', '3'}, + {'1', '0', '4'}, + {'1', '0', '5'}, + {'1', '0', '6'}, + {'1', '0', '7'}, + {'1', '0', '8'}, + {'1', '0', '9'}, + {'1', '1', '0'}, + {'1', '1', '1'}, + {'1', '1', '2'}, + {'1', '1', '3'}, + {'1', '1', '4'}, + {'1', '1', '5'}, + {'1', '1', '6'}, + {'1', '1', '7'}, + {'1', '1', '8'}, + {'1', '1', '9'}, + {'1', '2', '0'}, + {'1', '2', '1'}, + {'1', '2', '2'}, + {'1', '2', '3'}, + {'1', '2', '4'}, + {'1', '2', '5'}, + {'1', '2', '6'}, + {'1', '2', '7'}, + {'1', '2', '8'}, + {'1', '2', '9'}, + {'1', '3', '0'}, + {'1', '3', '1'}, + {'1', '3', '2'}, + {'1', '3', '3'}, + {'1', '3', '4'}, + {'1', '3', '5'}, + {'1', '3', '6'}, + {'1', '3', '7'}, + {'1', '3', '8'}, + {'1', '3', '9'}, + {'1', '4', '0'}, + {'1', '4', '1'}, + {'1', '4', '2'}, + {'1', '4', '3'}, + {'1', '4', '4'}, + {'1', '4', '5'}, + {'1', '4', '6'}, + {'1', '4', '7'}, + {'1', '4', '8'}, + {'1', '4', '9'}, + {'1', '5', '0'}, + {'1', '5', '1'}, + {'1', '5', '2'}, + {'1', '5', '3'}, + {'1', '5', '4'}, + {'1', '5', '5'}, + {'1', '5', '6'}, + {'1', '5', '7'}, + {'1', '5', '8'}, + {'1', '5', '9'}, + {'1', '6', '0'}, + {'1', '6', '1'}, + {'1', '6', '2'}, + {'1', '6', '3'}, + {'1', '6', '4'}, + {'1', '6', '5'}, + {'1', '6', '6'}, + {'1', '6', '7'}, + {'1', '6', '8'}, + {'1', '6', '9'}, + {'1', '7', '0'}, + {'1', '7', '1'}, + {'1', '7', '2'}, + {'1', '7', '3'}, + {'1', '7', '4'}, + {'1', '7', '5'}, + {'1', '7', '6'}, + {'1', '7', '7'}, + {'1', '7', '8'}, + {'1', '7', '9'}, + {'1', '8', '0'}, + {'1', '8', '1'}, + {'1', '8', '2'}, + {'1', '8', '3'}, + {'1', '8', '4'}, + {'1', '8', '5'}, + {'1', '8', '6'}, + {'1', '8', '7'}, + {'1', '8', '8'}, + {'1', '8', '9'}, + {'1', '9', '0'}, + {'1', '9', '1'}, + {'1', '9', '2'}, + {'1', '9', '3'}, + {'1', '9', '4'}, + {'1', '9', '5'}, + {'1', '9', '6'}, + {'1', '9', '7'}, + {'1', '9', '8'}, + {'1', '9', '9'}, + {'2', '0', '0'}, + {'2', '0', '1'}, + {'2', '0', '2'}, + {'2', '0', '3'}, + {'2', '0', '4'}, + {'2', '0', '5'}, + {'2', '0', '6'}, + {'2', '0', '7'}, + {'2', '0', '8'}, + {'2', '0', '9'}, + {'2', '1', '0'}, + {'2', '1', '1'}, + {'2', '1', '2'}, + {'2', '1', '3'}, + {'2', '1', '4'}, + {'2', '1', '5'}, + {'2', '1', '6'}, + {'2', '1', '7'}, + {'2', '1', '8'}, + {'2', '1', '9'}, + {'2', '2', '0'}, + {'2', '2', '1'}, + {'2', '2', '2'}, + {'2', '2', '3'}, + {'2', '2', '4'}, + {'2', '2', '5'}, + {'2', '2', '6'}, + {'2', '2', '7'}, + {'2', '2', '8'}, + {'2', '2', '9'}, + {'2', '3', '0'}, + {'2', '3', '1'}, + {'2', '3', '2'}, + {'2', '3', '3'}, + {'2', '3', '4'}, + {'2', '3', '5'}, + {'2', '3', '6'}, + {'2', '3', '7'}, + {'2', '3', '8'}, + {'2', '3', '9'}, + {'2', '4', '0'}, + {'2', '4', '1'}, + {'2', '4', '2'}, + {'2', '4', '3'}, + {'2', '4', '4'}, + {'2', '4', '5'}, + {'2', '4', '6'}, + {'2', '4', '7'}, + {'2', '4', '8'}, + {'2', '4', '9'}, + {'2', '5', '0'}, + {'2', '5', '1'}, + {'2', '5', '2'}, + {'2', '5', '3'}, + {'2', '5', '4'}, + {'2', '5', '5'} + }; + +inline static int +byte_to_ascii_decimal_unaligned( + unsigned char *ptr, unsigned char num) +{ + *ptr++ = ascii_numbers[num][0]; + if(PREDICT_FALSE(num < 10)) { + return 1; + } + *ptr++ = ascii_numbers[num][1]; + if(PREDICT_FALSE(num < 100)) { + return 2; + } + *ptr++ = ascii_numbers[num][2]; + return 3; +} + +/* Copies the dotted decimal format of ipv4 + * in to the space provided and + * returns the number of bytes copied + */ +inline static int +copy_ipv4_addr(unsigned char *ptr, u32 ipv4) +{ + unsigned char *temp = ptr; + temp += byte_to_ascii_decimal_unaligned(temp, (ipv4 >> 24)); + *temp++ = '.'; + temp += byte_to_ascii_decimal_unaligned(temp, ((ipv4 >> 16) & 0xFF)); + *temp++ = '.'; + temp += byte_to_ascii_decimal_unaligned(temp, ((ipv4 >> 8) & 0xFF)); + *temp++ = '.'; + temp += byte_to_ascii_decimal_unaligned(temp, (ipv4 & 0xFF)); + + return (temp - ptr); +} + +#ifdef TOBE_PORTED +/* + * edt: * * cnat_syslog_fill_ip_header + * + * Tries to fill the fields of the IP header before it + * is sent to the L3 infra node. + * + * Argument: cnat_syslog_logging_info_t *logging_info + * structure that contains the packet context + */ +inline +void cnat_syslog_fill_ip_header (cnat_syslog_logging_info_t *logging_info) +{ + spp_ctx_t *ctx; + + /* + * Fill in the IP header and port number of the Netflow collector + * The L3 Infra node will fill in the rest of the fields + */ + ctx = logging_info->current_logging_context; + fill_ip_n_udp_hdr(ctx, logging_info->ipv4_address, + logging_info->port, logging_info->pkt_length); + +} +#else +inline +void cnat_syslog_fill_ip_header (cnat_syslog_logging_info_t *logging_info) +{ + return; +} +#endif + +#ifndef TOBE_PORTED +void cnat_syslog_logging_init() +{ + return; +} + +void cnat_syslog_log_mapping_create(cnat_main_db_entry_t * db, + cnat_vrfmap_t *vrfmap) +{ + return; +} + +void cnat_syslog_log_mapping_delete(cnat_main_db_entry_t * db, + cnat_vrfmap_t *vrfmap) +{ + return; +} + +void cnat_syslog_ds_lite_port_limit_exceeded( + dslite_key_t * key, + dslite_table_entry_t *dslite_entry) +{ + return; +} + +void cnat_syslog_nat44_mapping_create(cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap, cnat_session_entry_t * sdb +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + return; +} + +/* Following are in cnat_util.c which are not ported */ +/* This function is defined in cnat_util.c which need to be ported */ +cnat_icmp_msg_t icmp_msg_gen_allowed () +{ + return 1; +} + +void cnat_syslog_nat44_mapping_delete(cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap, cnat_session_entry_t *sdb +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + return; +} + +u32 +cnat_get_unix_time_in_seconds (void) +{ + return 0; +} +#else /* TOBE_PORTED */ +void +cnat_syslog_dump_logging_context (u32 value1, + cnat_syslog_logging_info_t *logging_info, + u32 value2) +{ + u8 *pkt_ptr; + u32 i; + + if (PREDICT_TRUE(syslog_debug_flag == 0)) { + return; + } + /* + * Reduce the logging to few cores, to enable easier debugging + */ + if ((my_instance_number & 0x7) != 0) { + return; + } + printf("\nDumping %s packet at locn %d: time 0x%x", + (value2 == 1) ? "CURRENT" : "QUEUED", + value1, + cnat_get_unix_time_in_seconds()); + + printf("\ni_vrf 0x%x, ip_address 0x%x, port %d, pkt len %d", + 0 /* TO DP Add vrf like nfv9_logging_info->i_vrf */, + logging_info->ipv4_address, + logging_info->port, + logging_info->pkt_length); + printf("\n"); + + if (value2 == 1) { + pkt_ptr = logging_info->current_logging_context->packet_data; + } else { + pkt_ptr = logging_info->queued_logging_context->packet_data; + } + + /* + * Dump along with 8 bytes of SHIM header + */ + for (i = 0; i < + (logging_info->pkt_length + CNAT_NFV9_IP_HDR_OFFSET); + i = i + 1) { + u8 c1, c2, c3; + + if (i == 0) { + printf("\nL2_HEADER + SHIM_HEADER: \n"); + } else if (i == CNAT_NFV9_IP_HDR_OFFSET) { + printf("\nIP_HEADER: \n"); + } else if (i == CNAT_NFV9_UDP_HDR_OFFSET) { + printf("\nUDP_HEADER: \n"); + } else if (i == CNAT_NFV9_HDR_OFFSET) { + printf("\nSyslog content..\n"); + while(i < + (logging_info->pkt_length + CNAT_NFV9_HDR_OFFSET)) { + printf("%c", (u8)(*(pkt_ptr + i))); + i++; + if((u8)(*(pkt_ptr + i)) == '[') /* new record begins */ + printf("\n"); + } + return; + } + + c3 = *(pkt_ptr + i); + c2 = c3 & 0xf; + c1 = (c3 >> 4) & 0xf; + + printf("%c%c ", + ((c1 <= 9) ? (c1 + '0') : (c1 - 10 + 'a')), + ((c2 <= 9) ? (c2 + '0') : (c2 - 10 + 'a'))); + + } + + printf("\n"); +} + + +/* + * edt: * * cnat_syslog_send_pkt + * + * Tries to send a logging pkt. If the packet cannot be sent + * because of rewrite_output node cannot process it, queue + * it temporarily and try to send it later. + * + * Argument: cnat_syslog_logging_info_t *logging_info + * structure that contains the packet context + */ +inline +void cnat_syslog_send_pkt (cnat_syslog_logging_info_t *logging_info) +{ + spp_node_t *output_node; + + cnat_syslog_fill_ip_header(logging_info); + + output_node = spp_get_nodes() + + cnat_syslog_global_info.cnat_syslog_disp_node_index; + + cnat_syslog_dump_logging_context (2, logging_info, 1); + + if (PREDICT_TRUE(output_node->sf.nused < SPP_MAXDISPATCH)) { + /* + * Move the logging context to output node + */ + logging_info->current_logging_context->current_length = + logging_info->pkt_length; + PLATFORM_SET_CTX_RU_TX_FROM_NODE(logging_info->current_logging_context, \ + NODE_LOGGING); + spp_dispatch_make_node_runnable(output_node); + output_node->sf.ctxs[output_node->sf.nused++] = + logging_info->current_logging_context; + + if(PREDICT_FALSE(syslog_debug_flag > 10)) + printf("\nSyslog: 2. Sending Current packet\n"); + } else { + /* + * Queue the context into the logging_info structure, + * We will try to send it later. Currently, we will + * restrict to only one context queued. + */ + cnat_syslog_global_counter.downstream_constipation_count++; + if(PREDICT_FALSE(syslog_debug_flag > 10)) + printf("\nSyslog: 2. Downstream congestion \n"); + + /* + * Attach the current logging context which is full to the + * queued context list in logging_info structure + */ + logging_info->queued_logging_context = + logging_info->current_logging_context; + } + + /* + * Whether the context is queued or not, set the current context index + * to EMPTY, as the earlier context can no more be used to send + * more logging records. + */ + logging_info->current_logging_context = NULL; +} + + +/* + * edt: * * cnat_syslog_send_queued_pkt + * + * Tries to send a logging pkt that has been queued earlier + * because it could not be sent due to downstream constipation + * + * Argument: cnat_syslog_logging_info_t *logging_info + * structure that contains the packet context + */ +inline +void cnat_syslog_send_queued_pkt (cnat_syslog_logging_info_t *logging_info) +{ + spp_node_t *output_node; + + output_node = spp_get_nodes() + + cnat_syslog_global_info.cnat_syslog_disp_node_index; + + cnat_syslog_dump_logging_context(1, logging_info, 2); + + if(PREDICT_TRUE(output_node->sf.nused < SPP_MAXDISPATCH)) { + /* + * Move the logging context to output node + */ + /** This looks like a bug to me .. need to confirm ***** + logging_info->queued_logging_context->current_length = + nfv9_logging_info->pkt_length; ***/ + PLATFORM_SET_CTX_RU_TX_FROM_NODE(logging_info->queued_logging_context, + NODE_LOGGING) + spp_dispatch_make_node_runnable(output_node); + output_node->sf.ctxs[output_node->sf.nused++] = + logging_info->queued_logging_context; + + SYSLOG_DEBUG_PRINTF1("\nSYSLOG: 1. Sending Queued packet\n") + + /* + * Context has been queued, it will be freed after the pkt + * is sent. Clear this from the logging_context_info structure + */ + logging_info->queued_logging_context = NULL; + + } else { + cnat_syslog_global_counter.downstream_constipation_count++; + } +} + +/* + * edt: * * handle_pending_syslog_pkts + * + * Timer handler for sending any pending syslog record + * + */ +inline +void handle_pending_syslog_pkts() +{ + spp_node_t *output_node; + cnat_syslog_logging_info_t *my_logging_info = 0; + u32 current_timestamp = cnat_get_sys_up_time_in_ms(); + i16 sf_nused; + + output_node = spp_get_nodes() + + cnat_syslog_global_info.cnat_syslog_disp_node_index; + + sf_nused = output_node->sf.nused; + + pool_foreach (my_logging_info, cnat_syslog_logging_info_pool, ({ + /* + * Check if no more logging contexts can be queued + */ + if (PREDICT_FALSE(sf_nused >= SPP_MAXDISPATCH)) { + break; + } + if (my_logging_info->queued_logging_context) + cnat_syslog_send_queued_pkt (my_logging_info); + + if(my_logging_info->current_logging_context && + ((current_timestamp - + my_logging_info->current_logging_context_timestamp) + > 1000)) { + /* + * If there is a current logging context and timestamp + * indicates it is pending for long, send it out + * Also if there is a queued context send it out as well + */ + SYSLOG_DEBUG_PRINTF4("\nLOG_TIMER: queued %p, curr %p, sf_nused %d", + my_logging_info->queued_logging_context, + my_logging_info->current_logging_context, + sf_nused); + cnat_syslog_send_pkt(my_logging_info); + } + })); +} + +const unsigned char hex_numbers_single_digit[] = + { '0', '1', '2', '3', '4', '5', '6', '7', '8', + '9', 'a', 'b', 'c', 'd', 'e', 'f' }; + +inline static int u16_to_ascii_decimal_aligned( + unsigned char *ptr, u16 num, u16 min_digits) +{ + /* The logic below is replicated in + * function u16_to_ascii_decimal_unaligned + * except the use of min_digits + * Replication is done to optimize run time + * if you fix a bug here, check u16_to_ascii_decimal_unaligned + * as well (and vice versa) + */ + unsigned char *temp = ptr; + int no_leading_zeros = 0; + + if(num > 9999 || min_digits == 5) { + *temp++ = hex_numbers_single_digit[num/10000]; + num = num%10000; + no_leading_zeros = 1; + } + + if(no_leading_zeros || num > 999 || min_digits == 4) { + *temp++ = hex_numbers_single_digit[num/1000]; + num = num%1000; + no_leading_zeros = 1; + } + + if(no_leading_zeros || num > 99 || min_digits == 3) { + *temp++ = hex_numbers_single_digit[num/100]; + num = num%100; + no_leading_zeros = 1; + } + + if(no_leading_zeros || num > 9 || min_digits == 2) { + *temp++ = hex_numbers_single_digit[num/10]; + num = num%10; + } + + *temp++ = hex_numbers_single_digit[num]; + + return temp-ptr; +} + +inline static int u16_to_ascii_decimal_unaligned( + unsigned char *ptr, u16 num) +{ + /* + * return u16_to_ascii_decimal_aligned(ptr, num, 0); + * should do the job.. however, to opimize the run time + * the code of u16_to_ascii_decimal_aligned is being + * repeated here without the use of min_digits + * if you fix a bug here, please check + * u16_to_ascii_decimal_aligned as well (and vice versa) + */ + unsigned char *temp = ptr; + int no_leading_zeros = 0; + + if(num > 9999) { + *temp++ = hex_numbers_single_digit[num/10000]; + num = num%10000; + no_leading_zeros = 1; + } + + if(no_leading_zeros || num > 999) { + *temp++ = hex_numbers_single_digit[num/1000]; + num = num%1000; + no_leading_zeros = 1; + } + + if(no_leading_zeros || num > 99) { + *temp++ = hex_numbers_single_digit[num/100]; + num = num%100; + no_leading_zeros = 1; + } + + if(no_leading_zeros || num > 9) { + *temp++ = hex_numbers_single_digit[num/10]; + num = num%10; + } + + *temp++ = hex_numbers_single_digit[num]; + + return temp-ptr; +} + +static int syslog_get_timestamp(unsigned char *ts) +{ + static const char *months[] = {"Jan ", "Feb ", "Mar ", "Apr ", "May ", + "Jun ", "Jul ", "Aug ", "Sep ", "Oct ", "Nov ", "Dec " }; + + unsigned char *temp = ts; + /* Inserts time stamp in the syslog format and returns lenght + * assumes that ts has sufficient space + */ + /* China Telecom has demanded that the time stamp has to be + * in the format '2011 Jun 7 12:34:08' + */ + time_t time = (time_t)cnat_get_unix_time_in_seconds(); + struct tm tm1; + + gmtime_r(&time, &tm1); + /* Now put the pieces together */ + /* Year */ + ts += u16_to_ascii_decimal_unaligned(ts, (tm1.tm_year + 1900)); + *ts++ = SYSLOG_DELIMITER; + /* Month */ + memcpy(ts, months[tm1.tm_mon], 4); + ts += 4; /* DELIMITER taken care */ + /* day */ + ts += u16_to_ascii_decimal_unaligned(ts, tm1.tm_mday); + *ts++ = SYSLOG_DELIMITER; + /* hours */ + ts += u16_to_ascii_decimal_aligned(ts, tm1.tm_hour, 2); + *ts++ = ':'; + /* minutes */ + ts += u16_to_ascii_decimal_aligned(ts, tm1.tm_min, 2); + *ts++ = ':'; + /* seconds */ + ts += u16_to_ascii_decimal_aligned(ts, tm1.tm_sec, 2); + return ts - temp; +} + +/* Ensure that the order of the below array matches with + * syslog_service_type enum + */ +static char *syslog_service_string[] = { "NAT44", "DSLITE" }; + +/* Ensure that the order of below array matches with + * syslog_event_type_t enum + */ +typedef struct { + char *event_name; + int name_length; +} syslog_event_description_type; + +const static syslog_event_description_type sys_log_event[] = { + { "UserbasedA", 10 }, /* yes, 10 is strlen of "UserbasedA" */ + { "UserbasedW", 10 }, + { "SessionbasedA", 13 }, + { "SessionbasedW", 13 }, + { "SessionbasedAD", 14 }, + { "SessionbasedWD", 14 }, + { "Portblockrunout", 15 }, + { "TCPseqmismatch", 14}, + { "Invalid", 7 } +}; + +inline static int syslog_fill_header(const cnat_syslog_logging_info_t *log_info, + syslog_service_type_t s_type) +{ + /* Forms the syslog header and returns the lenght + * Assumes that header has sufficient space + */ + + /* Sample header (as agreed for China Telecom requirements -- + * <134> 1 2011 May 31 10:30:45 192.168.2.3 - - NAT44 - + */ + + unsigned char *temp, *header; + int count; + temp = header = (unsigned char *) + &(log_info->current_logging_context->packet_data[CNAT_NFV9_HDR_OFFSET]); + *temp++ = '<'; + temp += byte_to_ascii_decimal_unaligned(temp, + log_info->header_priority); + *temp++ = '>'; + *temp++ = SYSLOG_DELIMITER; + *temp++ = '1'; /* Syslog version -- always set to 1 */ + *temp++ = SYSLOG_DELIMITER; + temp += syslog_get_timestamp(temp); + *temp++ = SYSLOG_DELIMITER; + count = strlen(log_info->header_hostname); + memcpy(temp, log_info->header_hostname, count); + temp += count; + *temp++ = SYSLOG_DELIMITER; + *temp++ = SYSLOG_FIELD_ABSENT; /* App name - nil value */ + *temp++ = SYSLOG_DELIMITER; + *temp++ = SYSLOG_FIELD_ABSENT; /* Proc ID - nil value for now */ + *temp++ = SYSLOG_DELIMITER; + /* Now the msg id */ + count = strlen(syslog_service_string[s_type]); + memcpy(temp, syslog_service_string[s_type], count); + temp += count; + *temp++ = SYSLOG_DELIMITER; + *temp++ = SYSLOG_FIELD_ABSENT; /* No structured elements */ + *temp++ = SYSLOG_DELIMITER; +#ifdef SHOW_SYSLOG_TIMESTAMP + printf("\nSysLog TS: %s : Length %d", header, temp - header); +#endif /* SHOW_SYSLOG_TIMESTAMP */ + return temp-header; +} + +extern void cnat_logging_init(); + +/* one time call at the beginning */ +void cnat_syslog_logging_init() +{ + if(PREDICT_TRUE(cnat_syslog_global_info.cnat_syslog_init_done)) + return; /* Already done */ + + cnat_logging_init(); + cnat_syslog_global_info.cnat_syslog_disp_node_index = + spp_lookup_node_index(PLATFORM_SYSLOG_DISP_NODE_IDX); + ASSERT(cnat_syslog_global_info.cnat_syslog_disp_node_index != (u16)~0); + + cnat_syslog_global_info.cnat_syslog_init_done = 1; +} + +/* + * edt: * * cnat_syslog_create_logging_context + * + * Tries to create a logging context with packet buffer + * to send a new logging packet + * + * Argument: cnat_syslog_logging_info_t *logging_info + * structure that contains the logging info and will store + * the packet context as well. + */ +inline +void cnat_syslog_create_logging_context ( + cnat_syslog_logging_info_t *logging_info, + syslog_service_type_t s_type) +{ + spp_ctx_t *ctx; + + /* + * If queued_logging_context_index is non-EMPTY, we already have a logging + * packet queued to be sent. First try sending this before allocating + * a new context. We can have only one active packet context per + * logging_info structure + */ + + if (PREDICT_FALSE(logging_info->queued_logging_context != NULL)) { + cnat_syslog_send_queued_pkt(logging_info); + /* + * If we cannot still send the queued pkt, just return + * Downstream Constipation count would have increased anyway + */ + if (logging_info->queued_logging_context != NULL) { + cnat_syslog_global_counter.logging_context_creation_deferred_count++; + return; + } + } + + /* + * If no context can be allocated, return silently + * calling routine will handle updating the error counters + */ + if (spp_ctx_alloc(&ctx, 1) < 1) { + cnat_syslog_global_counter.logging_context_creation_fail_count++; + SYSLOG_DEBUG_PRINTF1("\nCould not allocate ctx for syslog"); + return; + } + + // Allocate packet buffer (used for AVSM currently) + PLATFORM_ALLOC_NFV9_PKT_BUFFER(ctx, 0); + + logging_info->current_logging_context = ctx; + + PLATFORM_SET_CTX_RU_TX_FROM_NODE(ctx, NODE_LOGGING); + + ctx->flags = SPP_CTX_END_OF_PACKET; + ctx->next_ctx_this_packet = (spp_ctx_t*) SPP_CTX_NO_NEXT_CTX; + ctx->current_header = &ctx->packet_data[CNAT_NFV9_HDR_OFFSET]; + + logging_info->pkt_length = syslog_fill_header(logging_info, s_type); + logging_info->pkt_length += (CNAT_NFV9_HDR_OFFSET - + CNAT_NFV9_IP_HDR_OFFSET); + logging_info->current_logging_context_timestamp = + cnat_get_sys_up_time_in_ms(); + +} + +inline static int u16_to_ascii_hex_unaligned( + unsigned char *ptr, u16 num) +{ + unsigned char nibble, *temp; + int no_leading_zeros = 0; + temp = ptr; + nibble = (num >> 12); + if(nibble) { + *temp++ = hex_numbers_single_digit[nibble]; + no_leading_zeros = 1; + } + + nibble = (num >> 8) & 0xF; + if(nibble || no_leading_zeros) { + *temp++ = hex_numbers_single_digit[nibble]; + no_leading_zeros = 1; + } + + nibble = (num >> 4) & 0xF; + if(nibble || no_leading_zeros) { + *temp++ = hex_numbers_single_digit[nibble]; + } + + *temp++ = hex_numbers_single_digit[num & 0xF]; + + return temp-ptr; +} + +inline static int ipv6_int_2_str(u32 ipv6[], unsigned char *ipv6_str) +{ +/* DC stands for Double Colon. + * Refer http://tools.ietf.org/html/rfc5952 for + * more details on text representations of + * IPV6 address + */ +#define DC_NOT_USED_YET 0 +#define DC_IN_USE 1 /* Zeros are skipped */ +#define DC_ALREADY_USED 2 /* Cannot skip zeros anymore */ + int i; + u16 *ipv6_temp = (u16 *)ipv6; + unsigned char *temp = ipv6_str; + int double_colon = DC_NOT_USED_YET; + for(i = 0; i < 7; i++) { + if(ipv6_temp[i]) { + ipv6_str += u16_to_ascii_hex_unaligned(ipv6_str, ipv6_temp[i]); + *ipv6_str++ = ':'; + if(double_colon == DC_IN_USE) { /* Cannot use DC anymore */ + double_colon = DC_ALREADY_USED; + } + } else { + if(double_colon == DC_IN_USE) { + /* Skip this zero as well */ + continue; + } else if((ipv6_temp[i+1]) + /* DC makes sense if there is more than one contiguous zero */ + || (double_colon != DC_NOT_USED_YET)) { + ipv6_str += u16_to_ascii_hex_unaligned(ipv6_str, + ipv6_temp[i]); + *ipv6_str++ = ':'; + } else { /* Start using DC */ + *ipv6_str++ = ':'; /* The 2nd colon */ + double_colon = DC_IN_USE; + } + } + } + if(ipv6_temp[7]) { + ipv6_str += u16_to_ascii_hex_unaligned(ipv6_str, ipv6_temp[7]); + } else if(double_colon != DC_IN_USE) { + *ipv6_str++ = '0'; + } + *ipv6_str = 0; + + return ipv6_str - temp; +} + +/* insert syslog record for nat44 */ + +void cnat_syslog_insert_nat44_record( + cnat_syslog_logging_info_t *log_info, + cnat_main_db_entry_t *db, cnat_vrfmap_t *vrfmap, + cnat_session_entry_t *sdb, int bulk_alloc, syslog_event_type_t e_type) +{ + /* This record should like this - + * [EventName <L4> <Original Source IP> <Inside VRF Name> + * <Original Source IPv6> < Translated Source IP> <Original Port> + * <Translated First Source Port> <Translated Last Source Port> + * <Destination ip address> <destination port>] + */ + u32 original_source = db->in2out_key.k.ipv4; + u32 translated_ip = db->out2in_key.k.ipv4; + cnat_user_db_entry_t *udb = cnat_user_db + db->user_index; + unsigned char *temp, *record; + u32 network_order_ipv6[4]; + + SYSLOG_CONFIG_DEBUG_PRINTF(4,"In Function %s\n", __func__); + temp = record = &(log_info->current_logging_context->packet_data[ + CNAT_NFV9_IP_HDR_OFFSET + log_info->pkt_length]); + + if (PREDICT_FALSE(!udb)) { + SYSLOG_DEBUG_PRINTF1("\nnull udb!"); + return; + } + + /* Now we point to the location where record needs to be inserted */ + *record++ = '['; /* Open the record */ + + /* Copy the record type */ + memcpy(record, sys_log_event[e_type].event_name, + sys_log_event[e_type].name_length); + record += sys_log_event[e_type].name_length; + *record++ = SYSLOG_DELIMITER; + + /* Copy the Protocol type */ + if(PREDICT_FALSE( + e_type == sessionbased_assign || e_type == sessionbased_withdraw || + e_type == sessionbased_assignD || e_type == sessionbased_withdrawD)) { + u16 my_proto_mask; + my_proto_mask = db->in2out_key.k.vrf & CNAT_PRO_MASK; + if(PREDICT_TRUE(my_proto_mask == CNAT_TCP)) { + *record++ = '6'; + } else if(PREDICT_TRUE(my_proto_mask == CNAT_UDP)) { + *record++ = '1'; + *record++ = '7'; + } else if(PREDICT_TRUE(my_proto_mask == CNAT_ICMP)) { + *record++ = '1'; + } else { /* Default, assume GRE (for PPTP) */ + *record++ = '4'; + *record++ = '7'; + } + } else { + *record++ = SYSLOG_FIELD_ABSENT; + } + *record++ = SYSLOG_DELIMITER; + + /* Copy the Original Source IP */ + record += copy_ipv4_addr(record, original_source); + *record++ = SYSLOG_DELIMITER; + + /* copy configured VRF NAME */ + memcpy(record, log_info->vrf_name, log_info->vrf_name_len); + record += log_info->vrf_name_len; + *record++ = SYSLOG_DELIMITER; + + /* No IPV6 source address for nat44 */ + *record++ = SYSLOG_FIELD_ABSENT; + *record++ = SYSLOG_DELIMITER; + + /* Copy the translated IP address */ + record += copy_ipv4_addr(record, translated_ip); + *record++ = SYSLOG_DELIMITER; + + /* Copy the Original port */ + if(e_type == sessionbased_assign || e_type == sessionbased_withdraw || + e_type == sessionbased_assignD || e_type == sessionbased_withdrawD) { + record += u16_to_ascii_decimal_unaligned( + record, db->in2out_key.k.port); + } else { + *record++ = SYSLOG_FIELD_ABSENT; + } + *record++ = SYSLOG_DELIMITER; + + /* Copy the start outside port */ + record += u16_to_ascii_decimal_unaligned(record, bulk_alloc); + *record++ = SYSLOG_DELIMITER; + + /* Copy the last outside port */ + if(e_type == userbased_assign || e_type == userbased_withdraw) { + record += u16_to_ascii_decimal_unaligned(record, + (bulk_alloc + BULKSIZE_FROM_VRFMAP(vrfmap) - 1)); + } else { + *record++ = SYSLOG_FIELD_ABSENT; + } + *record++ = SYSLOG_DELIMITER; + + /* Copy destination ip and port in case for DBL*/ + if(PREDICT_FALSE(e_type == sessionbased_assignD || e_type == sessionbased_withdrawD)) { + if(PREDICT_TRUE(sdb == NULL)) { + record += copy_ipv4_addr(record,db->dst_ipv4); + *record++ = SYSLOG_DELIMITER; + record += u16_to_ascii_decimal_unaligned(record, db->dst_port); + } else { + record += copy_ipv4_addr(record, sdb->v4_dest_key.k.ipv4); + *record++ = SYSLOG_DELIMITER; + record += u16_to_ascii_decimal_unaligned(record, sdb->v4_dest_key.k.port); + } + } else { + *record++ = '-'; + *record++ = SYSLOG_DELIMITER; + *record++ = '-'; + } + *record++ = SYSLOG_DELIMITER; + + *record++ = ']'; /* End of the reocrd */ + + log_info->pkt_length += record - temp; +} + +void cnat_syslog_insert_record( + cnat_syslog_logging_info_t *log_info, + cnat_main_db_entry_t *db, dslite_table_entry_t *dslite_entry, + cnat_session_entry_t *sdb, int bulk_alloc, syslog_event_type_t e_type) +{ + /* This record should like this - + * [EventName <L4> <Original Source IP> <Inside VRF Name> + * <Original Source IPv6> < Translated Source IP> <Original Port> + * <Translated First Source Port> <Translated Last Source Port> + * <Destination ip address> <destination port>] + */ + u32 original_source = db->in2out_key.k.ipv4; + u32 translated_ip = db->out2in_key.k.ipv4; + cnat_user_db_entry_t *udb = cnat_user_db + db->user_index; + unsigned char *temp, *record; + u32 network_order_ipv6[4]; + + temp = record = &(log_info->current_logging_context->packet_data[ + CNAT_NFV9_IP_HDR_OFFSET + log_info->pkt_length]); + + if (PREDICT_FALSE(!udb)) { + SYSLOG_DEBUG_PRINTF1("\nnull udb!"); + return; + } + + /* Now we point to the location where record needs to be inserted */ + *record++ = '['; /* Open the record */ + + /* Copy the record type */ + memcpy(record, sys_log_event[e_type].event_name, + sys_log_event[e_type].name_length); + record += sys_log_event[e_type].name_length; + *record++ = SYSLOG_DELIMITER; + + /* Copy the Protocol type */ + if(PREDICT_FALSE( + e_type == sessionbased_assign || e_type == sessionbased_withdraw || + e_type == sessionbased_assignD || e_type == sessionbased_withdrawD)) { + u16 my_proto_mask; + my_proto_mask = db->in2out_key.k.vrf & CNAT_PRO_MASK; + if(PREDICT_TRUE(my_proto_mask == CNAT_TCP)) { + *record++ = '6'; + } else if(PREDICT_TRUE(my_proto_mask == CNAT_UDP)) { + *record++ = '1'; + *record++ = '7'; + } else { + *record++ = '1'; + } + } else { + *record++ = SYSLOG_FIELD_ABSENT; + } + + *record++ = SYSLOG_DELIMITER; + + /* Copy the Original Source IP */ +#ifdef DSLITE_USER_IPV4 + record += copy_ipv4_addr(record, original_source); +#else + /* + * Do not include inside ipv4 address for B4 element level port limiting + */ + *record++ = SYSLOG_FIELD_ABSENT; +#endif + *record++ = SYSLOG_DELIMITER; + + /* copy configured VRF NAME */ + memcpy(record, log_info->vrf_name, log_info->vrf_name_len); + record += log_info->vrf_name_len; + *record++ = SYSLOG_DELIMITER; + + /* Copy the IPV6 source address */ + /* CSCtt16960 Fix. */ + network_order_ipv6[0] = htonl(udb->ipv6[0]); + network_order_ipv6[1] = htonl(udb->ipv6[1]); + network_order_ipv6[2] = htonl(udb->ipv6[2]); + network_order_ipv6[3] = htonl(udb->ipv6[3]); + + inet_ntop(AF_INET6,network_order_ipv6,record,INET6_ADDRSTRLEN); + record += strlen(record); + *record++ = SYSLOG_DELIMITER; + + /* Copy the translated IP address */ + record += copy_ipv4_addr(record, translated_ip); + *record++ = SYSLOG_DELIMITER; + + /* Copy the Original port */ + if(e_type == sessionbased_assign || e_type == sessionbased_withdraw || + e_type == sessionbased_assignD || e_type == sessionbased_withdrawD) { + record += u16_to_ascii_decimal_unaligned( + record, db->in2out_key.k.port); + } else { + *record++ = SYSLOG_FIELD_ABSENT; + } + *record++ = SYSLOG_DELIMITER; + + /* Copy the start outside port */ + record += u16_to_ascii_decimal_unaligned(record, bulk_alloc); + *record++ = SYSLOG_DELIMITER; + + /* Copy the last outside port */ + if(e_type == userbased_assign || e_type == userbased_withdraw) { + record += u16_to_ascii_decimal_unaligned(record, + (bulk_alloc + BULKSIZE_FROM_VRFMAP(dslite_entry) - 1)); + } else { + *record++ = SYSLOG_FIELD_ABSENT; + } + *record++ = SYSLOG_DELIMITER; + + if(PREDICT_FALSE(e_type == sessionbased_assignD || e_type == sessionbased_withdrawD)) { + if(sdb == NULL) { + record += copy_ipv4_addr(record, db->dst_ipv4); + *record++ = SYSLOG_DELIMITER; + record += u16_to_ascii_decimal_unaligned(record, db->dst_port); + } else { + record += copy_ipv4_addr(record, sdb->v4_dest_key.k.ipv4); + *record++ = SYSLOG_DELIMITER; + record += u16_to_ascii_decimal_unaligned(record, sdb->v4_dest_key.k.port); + } + } else { + *record++ = '-'; + *record++ = SYSLOG_DELIMITER; + *record++ = '-'; + } + *record++ = SYSLOG_DELIMITER; + + *record++ = ']'; /* End of the reocrd */ + + log_info->pkt_length += record - temp; +} + +#define SYSLOG_PRECHECK(entry, s_type) \ + if(PREDICT_FALSE((entry)->syslog_logging_index == EMPTY)) { \ + SYSLOG_DEBUG_PRINTF1("\n1. Log Mapping failed") \ + return; \ + } \ + logging_info = \ + cnat_syslog_logging_info_pool + (entry)->syslog_logging_index; \ + if(PREDICT_FALSE(logging_info->current_logging_context == NULL)) { \ + cnat_syslog_create_logging_context(logging_info, s_type); \ + if(PREDICT_FALSE(logging_info->current_logging_context == NULL)) { \ + SYSLOG_DEBUG_PRINTF1("\n2. Log Mapping failed") \ + return; \ + } \ + } + +void cnat_syslog_nat44_mapping_create(cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap, cnat_session_entry_t * sdb +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + cnat_syslog_logging_info_t *logging_info = 0; + syslog_event_type_t e_type; + int start_port; + + SYSLOG_CONFIG_DEBUG_PRINTF(4,"In Function %s\n", __func__); + SYSLOG_PRECHECK(vrfmap, NAT44) + +#ifndef NO_BULK_LOGGING + if(bulk_alloc > 0) { /* new bulk alloc - use bulk add template */ + e_type = userbased_assign; + start_port = bulk_alloc; + } else if(bulk_alloc == CACHE_ALLOC_NO_LOG_REQUIRED) { + return; /* No logging required.. bulk port usage */ + } + else { /* Individual logging .. fall back to old method */ +#endif + if(vrfmap->syslog_logging_policy == SESSION_LOG_ENABLE) { + e_type = sessionbased_assignD; + } else { + e_type = sessionbased_assign; + } + start_port = db->out2in_key.k.port; +#ifndef NO_BULK_LOGGING + } +#endif + + cnat_syslog_insert_nat44_record(logging_info, db, vrfmap, sdb, + start_port, e_type); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + + if (PREDICT_FALSE(logging_info->pkt_length > + logging_info->max_length_minus_max_record_size)) { + cnat_syslog_send_pkt(logging_info); + } +} + +void cnat_syslog_ds_lite_mapping_create(cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry, cnat_session_entry_t *sdb +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + cnat_syslog_logging_info_t *logging_info = 0; + syslog_event_type_t e_type; + int start_port; + + SYSLOG_PRECHECK(dslite_entry, DSLite) + +#ifndef NO_BULK_LOGGING + if(bulk_alloc > 0) { /* new bulk alloc - use bulk add template */ + e_type = userbased_assign; + start_port = bulk_alloc; + } else if(bulk_alloc == CACHE_ALLOC_NO_LOG_REQUIRED) { + return; /* No logging required.. bulk port usage */ + } + else { /* Individual logging .. fall back to old method */ +#endif + if(PREDICT_FALSE(dslite_entry->syslog_logging_policy == SESSION_LOG_ENABLE)) { + e_type = sessionbased_assignD; + } else { + e_type = sessionbased_assign; + } + start_port = db->out2in_key.k.port; +#ifndef NO_BULK_LOGGING + } +#endif + + cnat_syslog_insert_record(logging_info, db, dslite_entry, sdb, + start_port, e_type); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + + if (PREDICT_FALSE(logging_info->pkt_length > + logging_info->max_length_minus_max_record_size)) { + cnat_syslog_send_pkt(logging_info); + } +} + +void cnat_syslog_nat44_mapping_delete(cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap, cnat_session_entry_t *sdb +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + cnat_syslog_logging_info_t *logging_info = 0; + syslog_event_type_t e_type; + int start_port; + + SYSLOG_CONFIG_DEBUG_PRINTF(4,"In Function %s\n", __func__); + SYSLOG_PRECHECK(vrfmap, NAT44) + +#ifndef NO_BULK_LOGGING + if(bulk_alloc > 0) { /* new bulk alloc - use bulk add template */ + e_type = userbased_withdraw; + start_port = bulk_alloc; + } else if(bulk_alloc == CACHE_ALLOC_NO_LOG_REQUIRED) { + return; /* No logging required.. bulk port usage */ + } + else { /* Individual logging .. fall back to old method */ +#endif + if(vrfmap->syslog_logging_policy == SESSION_LOG_ENABLE) { + e_type = sessionbased_withdrawD; + } else { + e_type = sessionbased_withdraw; + } + start_port = db->out2in_key.k.port; +#ifndef NO_BULK_LOGGING + } +#endif + cnat_syslog_insert_nat44_record(logging_info, db, vrfmap, sdb, + start_port, e_type); + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (PREDICT_FALSE(logging_info->pkt_length > + logging_info->max_length_minus_max_record_size)) { + cnat_syslog_send_pkt(logging_info); + } +} + +void cnat_syslog_ds_lite_mapping_delete(cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry, cnat_session_entry_t *sdb +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ) +{ + cnat_syslog_logging_info_t *logging_info = 0; + syslog_event_type_t e_type; + int start_port; + + SYSLOG_PRECHECK(dslite_entry, DSLite) + +#ifndef NO_BULK_LOGGING + if(bulk_alloc > 0) { /* new bulk alloc - use bulk add template */ + e_type = userbased_withdraw; + start_port = bulk_alloc; + } else if(bulk_alloc == CACHE_ALLOC_NO_LOG_REQUIRED) { + return; /* No logging required.. bulk port usage */ + } + else { /* Individual logging .. fall back to old method */ +#endif + if(PREDICT_FALSE(dslite_entry->syslog_logging_policy == SESSION_LOG_ENABLE)) { + e_type = sessionbased_withdrawD; + } else { + e_type = sessionbased_withdraw; + } + start_port = db->out2in_key.k.port; +#ifndef NO_BULK_LOGGING + } +#endif + cnat_syslog_insert_record(logging_info, db, dslite_entry, sdb, + start_port, e_type); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + + if (PREDICT_FALSE(logging_info->pkt_length > + logging_info->max_length_minus_max_record_size)) { + cnat_syslog_send_pkt(logging_info); + } +} + +void cnat_syslog_dslite_insert_port_exceeded( + cnat_syslog_logging_info_t *log_info, + dslite_key_t * key) +{ + /* This record should like this - + * [Portblockrunout <L4> <Original Source IP> <Inside VRF Name> + * <Original Source IPv6> - <Original Port> - - - -] + */ + u32 network_order_ipv6[4]; + unsigned char *temp, *record; + + temp = record = &(log_info->current_logging_context->packet_data[ + CNAT_NFV9_IP_HDR_OFFSET + log_info->pkt_length]); + + /* Now we point to the location where record needs to be inserted */ + *record++ = '['; /* Open the record */ + + /* Copy the record type */ + memcpy(record, sys_log_event[port_block_runout].event_name, + sys_log_event[port_block_runout].name_length); + record += sys_log_event[port_block_runout].name_length; + *record++ = SYSLOG_DELIMITER; + + u16 my_proto_mask; + my_proto_mask = key->ipv4_key.k.vrf & CNAT_PRO_MASK; + if(PREDICT_TRUE(my_proto_mask == CNAT_TCP)) { + *record++ = '6'; + } else if(PREDICT_TRUE(my_proto_mask == CNAT_UDP)) { + *record++ = '1'; + *record++ = '7'; + } else { + *record++ = '1'; + } + *record++ = SYSLOG_DELIMITER; + + /* Copy the Original Source IP */ + record += copy_ipv4_addr(record, key->ipv4_key.k.ipv4); + *record++ = SYSLOG_DELIMITER; + + /* copy configured VRF NAME */ + memcpy(record, log_info->vrf_name, log_info->vrf_name_len); + record += log_info->vrf_name_len; + *record++ = SYSLOG_DELIMITER; + + /* Copy the IPV6 source address */ + network_order_ipv6[0] = htonl(key->ipv6[0]); + network_order_ipv6[1] = htonl(key->ipv6[1]); + network_order_ipv6[2] = htonl(key->ipv6[2]); + network_order_ipv6[3] = htonl(key->ipv6[3]); + + inet_ntop(AF_INET6,network_order_ipv6,record,INET6_ADDRSTRLEN); + record += strlen(record); + *record++ = SYSLOG_DELIMITER; + + *record++ = SYSLOG_FIELD_ABSENT; /* No translated source ip */ + *record++ = SYSLOG_DELIMITER; + + record += u16_to_ascii_decimal_unaligned( + record, key->ipv4_key.k.port); + *record++ = SYSLOG_DELIMITER; + + *record++ = SYSLOG_FIELD_ABSENT; /* No translated start port */ + *record++ = SYSLOG_DELIMITER; + + *record++ = SYSLOG_FIELD_ABSENT; /* No translated end port */ + *record++ = SYSLOG_DELIMITER; + + /*No Destination Info*/ + *record++ = '-'; + *record++ = SYSLOG_DELIMITER; + *record++ = '-'; + *record++ = SYSLOG_DELIMITER; + + *record++ = ']'; /* End of the reocrd */ + + log_info->pkt_length += record - temp; +} + +void cnat_syslog_ds_lite_port_limit_exceeded( + dslite_key_t * key, + dslite_table_entry_t *dslite_entry) +{ + cnat_syslog_logging_info_t *logging_info = 0; + + SYSLOG_PRECHECK(dslite_entry, DSLite) + + cnat_syslog_dslite_insert_port_exceeded(logging_info, key); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + + if (PREDICT_FALSE(logging_info->pkt_length > + logging_info->max_length_minus_max_record_size)) { + cnat_syslog_send_pkt(logging_info); + } +} + +void cnat_syslog_nat44_insert_port_exceeded( + cnat_syslog_logging_info_t *log_info, + cnat_key_t * key) +{ + /* This record should like this - + * [Portblockrunout <L4> <Original Source IP> <Inside VRF Name> + * - - <Original Port> - - - -] + */ + unsigned char *temp, *record; + + temp = record = &(log_info->current_logging_context->packet_data[ + CNAT_NFV9_IP_HDR_OFFSET + log_info->pkt_length]); + + /* Now we point to the location where record needs to be inserted */ + *record++ = '['; /* Open the record */ + + /* Copy the record type */ + memcpy(record, sys_log_event[port_block_runout].event_name, + sys_log_event[port_block_runout].name_length); + record += sys_log_event[port_block_runout].name_length; + *record++ = SYSLOG_DELIMITER; + + u16 my_proto_mask; + my_proto_mask = key->k.vrf & CNAT_PRO_MASK; + if(PREDICT_TRUE(my_proto_mask == CNAT_TCP)) { + *record++ = '6'; + } else if(PREDICT_TRUE(my_proto_mask == CNAT_UDP)) { + *record++ = '1'; + *record++ = '7'; + } else { + *record++ = '1'; + } + *record++ = SYSLOG_DELIMITER; + + /* Copy the Original Source IP */ + record += copy_ipv4_addr(record, key->k.ipv4); + *record++ = SYSLOG_DELIMITER; + + /* copy configured VRF NAME */ + memcpy(record, log_info->vrf_name, log_info->vrf_name_len); + record += log_info->vrf_name_len; + *record++ = SYSLOG_DELIMITER; + + /* No IPV6 source address for nat44 */ + *record++ = '-'; + *record++ = SYSLOG_DELIMITER; + + *record++ = '-'; /* No translated source ip */ + *record++ = SYSLOG_DELIMITER; + + record += u16_to_ascii_decimal_unaligned( + record, key->k.port); + *record++ = SYSLOG_DELIMITER; + + *record++ = '-'; /* No translated start port */ + *record++ = SYSLOG_DELIMITER; + + *record++ = '-'; /* No translated end port */ + *record++ = SYSLOG_DELIMITER; + + /*No Destination Info*/ + *record++ = '-'; + *record++ = SYSLOG_DELIMITER; + *record++ = '-'; + *record++ = SYSLOG_DELIMITER; + + *record++ = ']'; /* End of the reocrd */ + + log_info->pkt_length += record - temp; +} + +void cnat_syslog_nat44_port_limit_exceeded( + cnat_key_t * key, + cnat_vrfmap_t *vrfmap) +{ + cnat_syslog_logging_info_t *logging_info = 0; + + SYSLOG_PRECHECK(vrfmap, NAT44) + + cnat_syslog_nat44_insert_port_exceeded(logging_info, key); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + + if (PREDICT_FALSE(logging_info->pkt_length > + logging_info->max_length_minus_max_record_size)) { + cnat_syslog_send_pkt(logging_info); + } +} + +void cnat_syslog_nat44_insert_tcp_seq_mismatch( + cnat_syslog_logging_info_t *log_info, + cnat_main_db_entry_t *db) +{ + /* This record should like this - + * [TCPseqmismatch <L4> <Original Source IP> <Inside VRF Name> + * - <Translated Source IP> <Original Port> <Translated Source Port> - - -] + */ + unsigned char *temp, *record; + + temp = record = &(log_info->current_logging_context->packet_data[ + CNAT_NFV9_IP_HDR_OFFSET + log_info->pkt_length]); + + /* Now we point to the location where record needs to be inserted */ + *record++ = '['; /* Open the record */ + + /* Copy the record type */ + memcpy(record, sys_log_event[tcp_seq_mismatch].event_name, + sys_log_event[tcp_seq_mismatch].name_length); + record += sys_log_event[tcp_seq_mismatch].name_length; + *record++ = SYSLOG_DELIMITER; + + /* Next field is TCP */ + *record++ = '6'; + *record++ = SYSLOG_DELIMITER; + + /* Copy the Original Source IP */ + record += copy_ipv4_addr(record, db->in2out_key.k.ipv4); + *record++ = SYSLOG_DELIMITER; + + /* copy configured VRF NAME */ + memcpy(record, log_info->vrf_name, log_info->vrf_name_len); + record += log_info->vrf_name_len; + *record++ = SYSLOG_DELIMITER; + + /* No IPV6 source address for nat44 */ + *record++ = '-'; + *record++ = SYSLOG_DELIMITER; + + record += copy_ipv4_addr(record, db->out2in_key.k.ipv4); + *record++ = SYSLOG_DELIMITER; + + record += u16_to_ascii_decimal_unaligned( + record, db->in2out_key.k.port); + *record++ = SYSLOG_DELIMITER; + + record += u16_to_ascii_decimal_unaligned( + record, db->out2in_key.k.port); + *record++ = SYSLOG_DELIMITER; + + *record++ = '-'; /* No translated end port */ + *record++ = SYSLOG_DELIMITER; + + /*No Destination Info*/ + *record++ = '-'; + *record++ = SYSLOG_DELIMITER; + *record++ = '-'; + *record++ = SYSLOG_DELIMITER; + + *record++ = ']'; /* End of the reocrd */ + + log_info->pkt_length += record - temp; +} + +void cnat_syslog_nat44_tcp_seq_mismatch( + cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap) +{ + cnat_syslog_logging_info_t *logging_info = 0; + + SYSLOG_PRECHECK(vrfmap, NAT44) + + cnat_syslog_nat44_insert_tcp_seq_mismatch(logging_info, db); + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + + if (PREDICT_FALSE(logging_info->pkt_length > + logging_info->max_length_minus_max_record_size)) { + cnat_syslog_send_pkt(logging_info); + } +} +#endif diff --git a/vnet/vnet/vcgn/cnat_syslog.h b/vnet/vnet/vcgn/cnat_syslog.h new file mode 100644 index 00000000000..931f4b9cd22 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_syslog.h @@ -0,0 +1,190 @@ +/* + *------------------------------------------------------------------ + * cnat_syslog.h + * + * Copyright (c) 2011-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_SYSLOG_H__ +#define __CNAT_SYSLOG_H__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> + +#include "cnat_db.h" +#include "nat64_db.h" +#include "cnat_log_common.h" +#include "dslite_defs.h" + +#define SYSLOG_CONFIG_DEBUG_PRINTF(level, ...) \ + if (config_debug_level > level) PLATFORM_DEBUG_PRINT(__VA_ARGS__); + + +/* one time call at the beginning */ +void cnat_syslog_logging_init(); + +/* + * unconditional call + * will check logging config inside + */ +void cnat_syslog_log_mapping_create(cnat_main_db_entry_t * db, + cnat_vrfmap_t *vrfmap); + +/* + * unconditional call + * will check logging config inside + */ +void cnat_syslog_log_mapping_delete(cnat_main_db_entry_t * db, + cnat_vrfmap_t *vrfmap); + +void cnat_syslog_ds_lite_mapping_create(cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry, cnat_session_entry_t *sdb +#ifndef NO_BULK_LOGGING + , int bulk_alloc +#endif + ); + +void cnat_syslog_ds_lite_port_limit_exceeded( + dslite_key_t * key, + dslite_table_entry_t *dslite_entry); + +#define SYSLOG_TIMESTAMP_LENGTH 20 + +#define CNAT_SYSLOG_VERSION_NUMBER 1 +#define CNAT_SYSLOG_PRIORITY 16*8+6 +/* facility = local0 + severity = info */ + +#define MAX_SYSLOG_HOSTNAME_LEN 32 + +/* 6 for priority + space + * 2 for version + space + * 21 YYYY MMM DD HH:MM:SS + space + * 33 for hostname + space + * 4 for App Name (-) + space + Proc ID (-) + space + * 7 for Msg ID (DSLite is the longest Msg ID so far + space + * 2 for Structured data (-) + space + */ +#define MAX_SYSLOG_HEADER_LEN 75 + +/* 18 for Event Name (Portblockrunout is the longest as of now) + * 3 for L4 (including space) + * 16 for original souce IP + space + * 33 for inside vrf name + space + * 40 for original source IPV6 + space + * 16 for translated source IP + space + * 6 for original port + space + * 6 for translated first source port + space + * 5 for translated last source port + * 2 for [] enclosure + */ +#define MAX_SYSLOG_RECORD_LEN 145 + +typedef enum { + NAT44, + DSLite +} syslog_service_type_t; + +typedef enum { + userbased_assign, + userbased_withdraw, + sessionbased_assign, + sessionbased_withdraw, + sessionbased_assignD, + sessionbased_withdrawD, + port_block_runout, + tcp_seq_mismatch, + max_syslog_event_type +} syslog_event_type_t; + +/* + * This structure store the Syslog Logging information on per + * collector basis. This structure is allocated from a pool and index + * to this structure is stored VRF MAP structures + */ +typedef struct { + /* + * nat64_id will be 0 for nat44 config and i_vrf_id, i_vrf will be 0 + * for nat64 config. Nat64_id will be used while nat64 collector is + * search and i_vrf* for nat44 collector + */ + /* Similarly for ds_lite, ds_lite_id will be used and nat64_id, + * ivrf_id shall be set to 0 + */ + u32 i_vrf_id; /* Inside VRF ID corresponding to this collector */ + u16 i_vrf; /* Inside VRF (uidb_index) corresponding to this collector */ + u16 ds_lite_id; /* DS Lite instance for this collector */ + u16 port; /* Destination port number of the collector */ + + /* + * This field determines the maximum size of the Syslog information + * that can be stored in a logging packet + */ + u16 max_length_minus_max_record_size; + u32 ipv4_address; /* Destination IP address of the collector */ + /* + * Timestamp in UNIX seconds corresponding to when the current + * logging packet was created + */ + u32 current_logging_context_timestamp; + + /* + * Indicates if the entry is already deleted + */ + u8 deleted; + + u8 header_priority; + u16 pkt_length; + + char header_hostname[MAX_SYSLOG_HOSTNAME_LEN]; + char vrf_name[VRF_NAME_LEN_STORED]; + u16 vrf_name_len; + u8 logging_policy; + /* + * current logging context + */ + spp_ctx_t *current_logging_context; + spp_ctx_t *queued_logging_context; + +} cnat_syslog_logging_info_t; + + +/* + * Global structure for CGN APP configuration + */ +typedef struct { + + u16 cnat_syslog_disp_node_index; + + /* + * Whether we have initialized the Syslog information + */ + u8 cnat_syslog_init_done; + +} cnat_syslog_global_info_t; + +typedef struct { + u64 logging_context_creation_fail_count; + u64 downstream_constipation_count; + u64 logging_context_creation_deferred_count; +} cnat_syslog_global_counters_t; + +extern cnat_syslog_logging_info_t *cnat_syslog_logging_info_pool; +extern cnat_syslog_global_info_t cnat_syslog_global_info; + +#define SYSLOG_DEF_PATH_MTU 1500 + +#endif /* __CNAT_SYSLOG_H__ */ diff --git a/vnet/vnet/vcgn/cnat_util.c b/vnet/vnet/vcgn/cnat_util.c new file mode 100644 index 00000000000..2415c5437fd --- /dev/null +++ b/vnet/vnet/vcgn/cnat_util.c @@ -0,0 +1,2257 @@ +/* + *------------------------------------------------------------------ + * cnat_util.c - cnat helpers + * + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/vec.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> +#include <vppinfra/clib.h> +#include <vppinfra/error.h> + +#include "tcp_header_definitions.h" + +#if 0 +void spp_api_cnat_v4_config_dummy_t_handler +(spp_api_cnat_v4_config_dummy_t *mp); + +void spp_api_cnat_v4_config_dummy_max_t_handler +(spp_api_cnat_v4_config_dummy_max_t *mp); + +void spp_api_cnat_v4_config_icmp_timeout_t_handler +(spp_api_cnat_v4_config_icmp_timeout_t *mp); + +void spp_api_cnat_clear_db_request_t_handler +(spp_api_cnat_clear_db_request_t *mp); + +void spp_api_cnat_v4_debug_global_t_handler +(spp_api_cnat_v4_debug_global_t *mp); + +void spp_api_cnat_v4_show_outside_entry_req_t_handler +(spp_api_cnat_v4_show_outside_entry_req_t *mp); + +void spp_api_cnat_v4_show_inside_entry_req_t_handler +(spp_api_cnat_v4_show_inside_entry_req_t *mp); + +void spp_api_cnat_show_statistics_summary_req_t_handler +(spp_api_cnat_show_statistics_summary_req_t *mp); + +void cnat_db_create_db_entries_cmd (int argc, unsigned long *argv) +{ + int i, j ; + int nusers = 3000; + + cnat_db_key_bucket_t key_info; + cnat_main_db_entry_t *db; + cnat_gen_icmp_info info; + cnat_key_t dest_info_dummy; + + memset(&dest_info_dummy, 0, sizeof(cnat_key_t)); + printf ("Create %d users, 100 translations each...\n", nusers); + + for (i = 0; i < nusers; i++) { + for (j = 0; j < 100; j++) { + + key_info.k.k.vrf = CNAT_TCP | (1 & CNAT_VRF_MASK); + key_info.k.k.ipv4 = 0x0c000001+i; + key_info.k.k.port = 1024+j; + + db = cnat_get_main_db_entry_v2(&key_info, PORT_SINGLE, + PORT_TYPE_DYNAMIC, &info, &dest_info_dummy); + + if (db == 0) { + printf ("OOPS: cnat_main_db_create failed users %d trans %d\n", i, j); + return; /*jli*/ + } + + db->entry_expires = cnat_current_time; + + } + } +} + +void db_test_clear (int argc, unsigned long *argv) +{ + spp_api_cnat_clear_db_request_t mp; + + mp.wildcard = argv[0]; + mp.protocol = argv[1]; + mp.port_num = argv[2]; + mp.inside_vrf = argv[3]; + mp.ip_addr = argv[4]; + spp_api_cnat_clear_db_request_t_handler(&mp); +} + +/* test code*/ +void cnat_db_test_show (int argc, unsigned long *argv) +{ + + spp_api_cnat_v4_show_inside_entry_req_t mp1[2000]; + spp_api_cnat_v4_show_outside_entry_req_t mp2[30000]; + + if (argc > 1) { + if (argc != 7) { + printf("Usage: db test show dec((which)) dec((vrf)) dec((proto)) dec((ip)) dec((start_port)) dec((end_port)) dec((flags))\n"); + return; + } + + mp1[0].vrf_id = argv[1]; + mp1[0].protocol = argv[2];; + mp1[0].ipv4_addr = argv[3]; + mp1[0].start_port = argv[4]; + mp1[0].end_port = argv[5]; + mp1[0].flags = argv[6]; + mp1[0].all_entries = 0; + + if (argv[0] == 1) { + spp_api_cnat_v4_show_inside_entry_req_t_handler (&(mp1[0])); + } else { + spp_api_cnat_v4_show_outside_entry_req_t_handler (&(mp2[0])); + } + + return; + } else { + printf("inside entries \n"); + mp1[0].ipv4_addr = 0x016994CA; + mp1[0].vrf_id = 1; + mp1[0].all_entries = 0; + mp1[0].start_port = 32765; + mp1[0].end_port = 65535; + mp1[0].protocol = 2; + mp1[0].flags = 3; + + spp_api_cnat_v4_show_inside_entry_req_t_handler (&(mp1[0])); + + mp2[0].ipv4_addr = 0x640200c1; + mp2[0].vrf_id = 0; + mp2[0].start_port = 1025; + mp2[0].end_port = 62235; + mp2[0].protocol = 2; + mp2[0].flags = 3; + + spp_api_cnat_v4_show_outside_entry_req_t_handler (&(mp2[0])); + } + +#if 1 + { + spp_api_cnat_stats_node_mapping_t mp3[20000]; + spp_api_cnat_stats_counter_mapping_t mp4[20000]; + spp_api_cnat_stats_counter_values_t mp5[23000]; + + mp3[0].rc = 0; + spp_api_cnat_stats_node_mapping_t_handler (&mp3); + mp4[0].rc = 0; + spp_api_cnat_stats_counter_mapping_t_handler (&mp4); + + mp5[0].flag = 1; + spp_api_cnat_stats_counter_values_t_handler(&mp5); + } +#endif + +#if 0 + mp1.ipv4_addr = 0x0A010102; + mp1.vrf_id = 1; + mp1.all_entries = 1; + mp1.protocol = 1; + + spp_api_cnat_v4_show_inside_entry_req_t_handler (&mp1); + + + mp1.ipv4_addr = 0x0A010103; + mp1.vrf_id = 1; + mp1.all_entries = 1; + mp1.protocol = 2; + + spp_api_cnat_v4_show_inside_entry_req_t_handler (&mp1); + + mp6[0].inside_vrf_id = 1; + mp6[0].start_ipv4_address = 0x64020001; + mp6[0].end_ipv4_address = 0x64020101; + mp6[0].free_addr = 0; + mp6[0].flags = CNAT_TRANSLATION_ENTRY_STATIC; + + spp_api_cnat_v4_show_freeUsed_entry_req_t_handler(&mp6); + +#endif + printf("returned here"); + + return; +} + + + +void cnat_db_clear_all_entries (int argc, unsigned long *argv) +{ + cnat_main_db_entry_t * db; + u32 index; + + pool_header_t * p = vec_header(cnat_main_db, sizeof(pool_header_t) ); + + for(index = 0; index < vec_len(cnat_main_db); index++) { + + if ( !clib_bitmap_get(p->free_bitmap, index)) { + + db = cnat_main_db + index; + cnat_delete_main_db_entry_v2(db); + + } + } + +} + + +void spp_log_cmd (int argc, unsigned long *argv) +{ + u16 num_traces; + u16 error_code; + u16 num_args; + u32 arg[7]; + u8 i; + + num_traces = argv[0]; + + for (i = 0; i < num_traces; i++) { + error_code = argv[1 + 4*i]; + num_args = argv[2 + 4*i]; + arg[0] = argv[3 + 4*i]; + arg[1] = argv[4 + 4*i]; + + spp_printf(error_code, num_args, arg); + } +} + + +void cnat_db_create_random_entries (int argc, unsigned long *argv) +{ + + platform_cnat_db_create_random_entries(); +} + +#define MAX_DEPTH 10 + +void show_user_db_hash_chain_len() { + + u32 max_len, len, n, i, max_idx, index, used; + cnat_user_db_entry_t * udb; + u32 hash_depth[MAX_DEPTH]; + + memset(hash_depth, 0, sizeof(u32)*MAX_DEPTH); + + n = vec_len(cnat_user_hash); + + used = 0; + max_len = 0; + for(i=0;i<n;i++) { + + index = cnat_user_hash[i].next; + + len = 0; + if (index != EMPTY) used++; + + while (index != EMPTY) { + len++ ; + udb = cnat_user_db + index; + index = udb->user_hash.next; + } + + if(len < (MAX_DEPTH-1) ) { + hash_depth[len]++; + } else { + hash_depth[MAX_DEPTH-1]++; + } + + if (max_len < len) { + max_len = len; + max_idx = cnat_user_hash[i].next; + } + } + + printf("Max user db hash length %u, total buckets %u used %u\n", + max_len, n, used); + + for( i=1; i<(MAX_DEPTH - 1); i++) { + printf("Hash chain len %02d, entries count %d\n", i, hash_depth[i]); + } + + printf("Hash chain len >%02d, entries count %d\n", + MAX_DEPTH-1, hash_depth[MAX_DEPTH-1]); + +} + +void show_main_db_hash_chain_len() { + + u32 max_len, len, n, i, max_idx, index, used; + cnat_main_db_entry_t * db; + u32 hash_depth[MAX_DEPTH]; + + memset(hash_depth, 0, sizeof(u32)*MAX_DEPTH); + + n = vec_len(cnat_in2out_hash); + + used = 0; + max_len = 0; + for(i=0;i<n;i++) { + + index = cnat_in2out_hash[i].next; + + len = 0; + if (index != EMPTY) used++; + + while (index != EMPTY) { + len++ ; + db = cnat_main_db + index; + index = db->in2out_hash.next; + } + + if(len < (MAX_DEPTH-1) ) { + hash_depth[len]++; + } else { + hash_depth[MAX_DEPTH-1]++; + } + + if (max_len < len) { + max_len = len; + max_idx = cnat_in2out_hash[i].next; + } + } + + printf("Max main db I2O hash length %u, total buckets %u used %u\n", + max_len, n, used); + + for( i=1; i<(MAX_DEPTH - 1); i++) { + printf("Hash chain len %02d, entries count %d\n", i, hash_depth[i]); + } + + printf("Hash chain len >%02d, entries count %d\n", + MAX_DEPTH-1, hash_depth[MAX_DEPTH-1]); + + + memset(hash_depth, 0, sizeof(u32)*MAX_DEPTH); + + n = vec_len(cnat_out2in_hash); + used = 0; + max_len = 0; + + for(i=0;i<n;i++) { + + index = cnat_out2in_hash[i].next; + len = 0; + + if (index != EMPTY) used++; + + while (index != EMPTY) { + len++ ; + db = cnat_main_db + index; + index = db->out2in_hash.next; + } + + if(len < (MAX_DEPTH-1) ) { + hash_depth[len]++; + } else { + hash_depth[MAX_DEPTH-1]++; + } + + if (max_len < len) { + max_len = len; + max_idx = cnat_in2out_hash[i].next; + } + } + + printf("Max main db O2I hash length %u, total buckets %u used %u\n", + max_len, n, used); + + for( i=1; i<(MAX_DEPTH - 1); i++) { + printf("Hash chain len %02d, entries count %d\n", i, hash_depth[i]); + } + + printf("Hash chain len >%02d, entries count %d\n", + MAX_DEPTH-1, hash_depth[MAX_DEPTH-1]); + + +} + +u32 db_free_entry (void * p) { + + pool_header_t * h; + u32 free; + + h = pool_header(p); + + free = p == 0 ? 0: vec_len(h->free_indices); + + return free; +} + +void cnat_db_summary (int argc, unsigned long *argv) { + + PLATFORM_DEBUG_PRINT("\n-----------------------------------------"); + PLATFORM_DEBUG_PRINT("\nSummary DB"); + PLATFORM_DEBUG_PRINT("\n-----------------------------------------\n"); + u32 count1, count2, i; +#ifndef NO_NAT64_DEF + extern void nat64_session_db_summary(); +#endif + /* main db active entry count*/ + count1 = vec_len(cnat_main_db); + count2 = db_free_entry(cnat_main_db); + + PLATFORM_DEBUG_PRINT("main db entries: total %u, active %u, free %u\n", count1, count1 - count2, count2); + + /* user db active entry count */ + count1 = vec_len(cnat_user_db); + count2 = db_free_entry(cnat_user_db); + + PLATFORM_DEBUG_PRINT("user db entries: total %u, active %u, free %u\n", count1, count1 - count2, count2); + + + /* user db active entry count */ +#ifndef NO_NAT64_DEF + nat64_session_db_summary(); +#endif + + /* main db hash i2o o2i usage count */ + count1 = 0; + count2 = 0; + + for (i=0; i< CNAT_MAIN_HASH_SIZE; i++) { + + if(cnat_in2out_hash[i].next != ~0) count1++; + if(cnat_out2in_hash[i].next != ~0) count2++; + + } + + PLATFORM_DEBUG_PRINT("main hash in2out: total %6u, used %u (%.2f%%)\n", + CNAT_MAIN_HASH_SIZE, count1, + (100.0*count1)/CNAT_MAIN_HASH_SIZE); + + PLATFORM_DEBUG_PRINT("main hash out2in: total %6u, used %u (%.2f%%)\n", + CNAT_MAIN_HASH_SIZE, count2, + (100.0 * count1)/CNAT_MAIN_HASH_SIZE); + + /* use db hashing usage count */ + + count1 = 0; + + for (i=0; i< CNAT_USER_HASH_SIZE; i++) { + if(cnat_user_hash[i].next != ~0) count1++; + } + + PLATFORM_DEBUG_PRINT("user db hash: total %6u, used %u (%.2f%%)\n", + CNAT_USER_HASH_SIZE, count1, + (100.0*count1)/CNAT_USER_HASH_SIZE); + + PLATFORM_DEBUG_PRINT("\nNull pointer exceptions:\n"); + PLATFORM_DEBUG_PRINT("packet_pool: null enq : %10u, null deq : %10u\n",null_enq_pkt, null_deq_pkt); + PLATFORM_DEBUG_PRINT("ctx_pool : null enq : %10u, null deq : %10u\n",null_enq_ctx, null_deq_ctx); + PLATFORM_DEBUG_PRINT("wqe_pool : null enq : %10u, null deq : %10u\n",null_enq_wqe, null_deq_wqe); + + PLATFORM_DEBUG_PRINT("\nReceived Packet Errors on SPI:\n"); + PLATFORM_DEBUG_PRINT("rcv_pkt_errs: %10u\n",rcv_pkt_errs); + + PLATFORM_DEBUG_PRINT("\nctx/sf allocation failure errors: \n"); +#ifndef CGN_PERF_SCALE_DEBUG + PLATFORM_DEBUG_PRINT("Warning: collection of error counts <with timestamp> is disabled.\n"); + PLATFORM_DEBUG_PRINT("sf alloc errors: %10u, ctx alloc errors: %10u\n",sf_alloc_errs,ctx_alloc_errs); +#else + for(i=0;i<COUNTER_BUFFER_SIZE;i++) + PLATFORM_DEBUG_PRINT("<%2d>Timestamp <sec>: %10u, sf errors: %10u, ctx errors: %10u\n",\ + i,err_cnt_arr[i].timestamp,\ + err_cnt_arr[i].sf_error_counter, \ + err_cnt_arr[i].ctx_error_counter); +#endif +} + +void cnat_db_hash_summary (int argc, unsigned long *argv) { + + show_main_db_hash_chain_len(); + + show_user_db_hash_chain_len(); +} + +/* + * cnat_port_alloc + * This function is now deprecated... + * + */ +#ifdef LB_PORT +int cnat_port_alloc (cnat_portmap_t *cnat_portmap, u16 *portmap_inuse, + int cnat_instance, + port_alloc_t atype, port_pair_t ptype, + int *index, u32 *ipv4_address, u16 *base_port) +#else +int cnat_port_alloc (cnat_portmap_t *cnat_portmap, u16 *portmap_inuse, + port_alloc_t atype, port_pair_t ptype, + int *index, u32 *ipv4_address, u16 *base_port) +#endif +{ + + return (0); +} + +/* + * cnat_port_free + * This function is now deprecated... + * + */ +#ifdef LB_PORT +void cnat_port_free (cnat_portmap_t *cnat_portmap, u16 *portmap_inuse, + int instance, int index, port_pair_t ptype, u16 base_port) +#else +void cnat_port_free (cnat_portmap_t *cnat_portmap, u16 *portmap_inuse, + int index, port_pair_t ptype, u16 base_port) +#endif +{ +} + +void spp_api_cnat_port_allocate_t_handler(spp_api_cnat_port_allocate_t *mp) +{ + int i, j, k1, k2; + int pm_index; + u32 ipv4_address; + u16 aport; + int rv; + char *out1, *out2, *out_f; + port_alloc_t pt1, pt2; + cnat_portmap_t *pm = 0; + u16 *pm_inuse = 0; + u32 *firstp =0; + u32 nr_ports =0; + u32 nodd_ports = 0; + u32 neven_ports = 0; +#ifdef LB_PORT + u32 my_instance = 1; +#endif + char out_r[12] = "allocated-r"; + char out_o[12] = "allocated-o"; + char out_e[12] = "allocated-e"; + + + /* + * this command is run after db create portmap + * vrf is hardcode to 1 + */ + + /* Already have a portmap vector for this VRF? */ + for (i = 0; i < vec_len(cnat_portmap_indices_by_vrf); i++) { + if (cnat_portmap_indices_by_vrf[i] == mp->vrf) { + pm = cnat_portmaps[i]; + pm_inuse = cnat_portmaps_inuse[i]; + goto found_portmaps; + } + } + + printf("need to run db create portmaps first 0x%d\n", + vec_len(cnat_portmap_indices_by_vrf)); + return; + +found_portmaps: + nr_ports = mp->nr_ports; + nodd_ports = mp->nodd_ports; + neven_ports = mp->neven_ports; + + if ((nr_ports + nodd_ports + neven_ports ) > (PORTS_PER_ADDR)) { + printf("invalid port# nr_ports %d + odd %d + even %d " + "should be less than 200 \n", nr_ports, nodd_ports, neven_ports); + return; + } + + /* + * first port + */ + firstp = nr_ports ? (&nr_ports) : (nodd_ports ? (&nodd_ports) : (&neven_ports)); + if (!(*firstp)) { + printf("invalid port# nr_ports %d odd %d even %d ", + nr_ports, nodd_ports, neven_ports); + } + out_f = nr_ports ? out_r : (nodd_ports ? out_o : out_e); + +#ifdef LB_PORT + rv = cnat_port_alloc (pm, pm_inuse, my_instance, + PORT_ALLOC_ANY, PORT_S_ODD, + &pm_index, &ipv4_address, &aport); +#else + rv = cnat_port_alloc (pm, pm_inuse, + PORT_ALLOC_ANY, PORT_S_ODD, + &pm_index, &ipv4_address, &aport); +#endif + + if (!rv) { + printf("failed-o\n"); + return; + } + printf("%s %8d %10x %8d\n", out_f, + pm_index, ipv4_address, aport); + + (*firstp)--; + + for (i=0; i < nr_ports; i++) { +#ifdef LB_PORT + rv = cnat_port_alloc (pm, pm_inuse, my_instance, + PORT_ALLOC_DIRECTED, PORT_SINGLE, + &pm_index, &ipv4_address, &aport); +#else + rv = cnat_port_alloc (pm, pm_inuse, + PORT_ALLOC_DIRECTED, PORT_SINGLE, + &pm_index, &ipv4_address, &aport); +#endif + if (rv) { + printf("%s %8d %10x %8d\n", out_r, + pm_index, ipv4_address, aport); + } else { + printf("%s failed\n", out_r); + return; + } + } + + if (nodd_ports > neven_ports) { + k1 = nodd_ports; + k2 = neven_ports; + pt1 = PORT_S_ODD; + pt2 = PORT_S_EVEN; + out1 = out_o; + out2 = out_e; + } else { + k1= neven_ports; + pt1 = PORT_S_EVEN; + k2 = nodd_ports; + pt2 = PORT_S_ODD; + out1 = out_e; + out2 = out_o; + } + + j = 0; + for (i=0; i < k1; i++) { +#ifdef LB_PORT + rv = cnat_port_alloc (pm, pm_inuse, my_instance, + PORT_ALLOC_DIRECTED, pt1, + &pm_index, &ipv4_address, &aport); +#else + rv = cnat_port_alloc (pm, pm_inuse, + PORT_ALLOC_DIRECTED, pt1, + &pm_index, &ipv4_address, &aport); +#endif + if (rv) { + printf("%s %8d %10x %8d\n", out1, + pm_index, ipv4_address, aport); + } else { + printf("%s failed\n", out1); + return; + } + + if (j < k2) { +#ifdef LB_PORT + rv = cnat_port_alloc (pm, pm_inuse, my_instance, + PORT_ALLOC_DIRECTED, pt2, + &pm_index, &ipv4_address, &aport); +#else + rv = cnat_port_alloc (pm, pm_inuse, + PORT_ALLOC_DIRECTED, pt2, + &pm_index, &ipv4_address, &aport); +#endif + + if (rv) { + printf("%s %8d %10x %8d\n", out2, + pm_index, ipv4_address, aport); + j++; + } else { + printf("%s failed\n", __FUNCTION__); + return; + } + } + } +} + +void cnat_db_summary_stats (int argc, unsigned long *argv) +{ + spp_api_cnat_show_statistics_summary_req_t mp[50000]; + + spp_api_cnat_show_statistics_summary_req_t_handler(&(mp[0])); +} + +void cnat_debug_global_test (int argc, unsigned long *argv) +{ + spp_api_cnat_v4_debug_global_t *mp; + spp_api_cnat_v4_config_dummy_t mp1; + spp_api_cnat_v4_config_icmp_timeout_t mp2[10]; + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + + mp->_spp_msg_id = SPP_API_CNAT_V4_DEBUG_GLOBAL; + mp->debug_flag = argv[0]; + + platform_send_msg(mp); + + mp2[0].default_value = 3; + + spp_api_cnat_v4_config_dummy_t_handler(&mp1); + spp_api_cnat_v4_config_icmp_timeout_t_handler(&(mp2[0])); +} + +void cnat_debug_inside_test (int argc, unsigned long *argv) +{ + + spp_api_cnat_v4_debug_in2out_private_addr_t *mp; + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + + mp->_spp_msg_id = SPP_API_CNAT_V4_DEBUG_IN2OUT_PRIVATE_ADDR; + + mp->start_addr = spp_host_to_net_byte_order_32(argv[0]); + mp->end_addr = spp_host_to_net_byte_order_32(argv[1]); + mp->i_vrf = spp_host_to_net_byte_order_16(argv[2]); + mp->debug_flag = spp_host_to_net_byte_order_32(argv[3]); + + platform_send_msg(mp); +} + +void cnat_config_ports_user (int argc, unsigned long *argv) +{ + spp_api_cnat_v4_config_port_limit_t *mp; + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + + mp->_spp_msg_id = SPP_API_CNAT_V4_CONFIG_PORT_LIMIT; + + mp->port_limit = spp_host_to_net_byte_order_16(argv[0]); + + platform_send_msg(mp); + +} + +void cnat_debug_outside_test (int argc, unsigned long *argv) +{ + + spp_api_cnat_v4_debug_out2in_public_addr_t *mp; + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + + mp->_spp_msg_id = SPP_API_CNAT_V4_DEBUG_OUT2IN_PUBLIC_ADDR; + + mp->start_addr = spp_host_to_net_byte_order_32(argv[0]); + mp->end_addr = spp_host_to_net_byte_order_32(argv[1]); + mp->o_vrf = spp_host_to_net_byte_order_16(argv[2]); + mp->debug_flag = spp_host_to_net_byte_order_32(argv[3]); + + platform_send_msg(mp); +} + +void cnat_debug_udp_dump (int argc, unsigned long *argv) +{ + + spp_api_cnat_p2mp_debug_request_t *mp; + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + + mp->_spp_msg_id = SPP_API_CNAT_P2MP_DEBUG_REQUEST; + mp->dump_type = + spp_host_to_net_byte_order_16(CNAT_DEBUG_GENERIC_COMMAND_DEBUG_FLAGS); + + if (spp_host_to_net_byte_order_32(argv[0]) == 1) { + mp->param[0] = spp_host_to_net_byte_order_32( + CNAT_DEBUG_FLAG_UDP_INSIDE_PACKET_DUMP); + } else { + mp->param[0] = spp_host_to_net_byte_order_32( + CNAT_DEBUG_FLAG_UDP_OUTSIDE_PACKET_DUMP); + } + mp->param[1] = spp_host_to_net_byte_order_32(argv[1]); + + platform_send_msg(mp); + + + +} + +void cnat_debug_udp_crc (int argc, unsigned long *argv) +{ + spp_api_cnat_p2mp_debug_request_t *mp; + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + + mp->_spp_msg_id = SPP_API_CNAT_P2MP_DEBUG_REQUEST; + mp->dump_type = + spp_host_to_net_byte_order_16(CNAT_DEBUG_GENERIC_COMMAND_DEBUG_FLAGS); + + if (spp_host_to_net_byte_order_32(argv[0]) == 1) { + mp->param[0] = spp_host_to_net_byte_order_32( + CNAT_DEBUG_FLAG_UDP_INSIDE_CHECKSUM_MODIFY); + } else { + mp->param[0] = spp_host_to_net_byte_order_32( + CNAT_DEBUG_FLAG_UDP_OUTSIDE_CHECKSUM_MODIFY); + } + mp->param[1] = spp_host_to_net_byte_order_32(argv[1]); + + platform_send_msg(mp); + +} + +void cnat_db_allocate_port_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_port_allocate_t *mp; + + + if (!argc) { + printf("no port# defined\n"); + return; + } + + if ( argc < 3) { + printf("no port# defined\n"); + return; + } + + if ((argc == 3) && (argv[0] == 0) && (argv[1] == 0) && (argv[2] == 0)) { + printf("no port# defined\n"); + return; + } + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + + mp->_spp_msg_id = SPP_API_CNAT_PORT_ALLOCATE; + mp->nr_ports = argv[0]; + mp->nodd_ports = argv[1]; + mp->neven_ports = argv[2]; + mp->vrf = 1; + + platform_send_msg(mp); +} + + +void spp_api_cnat_port_clear_t_handler(spp_api_cnat_port_clear_t *mp) +{ + u32 i; + cnat_portmap_t *pm = 0; + u16 *pm_inuse = 0; +#ifdef LB_PORT + u32 my_instance = 1; +#endif + + + /* + * this command is run after db create port + * vrf is hardcode to 1 + */ + + /* Already have a portmap vector for this VRF? */ + for (i = 0; i < vec_len(cnat_portmap_indices_by_vrf); i++) { + if (cnat_portmap_indices_by_vrf[i] == mp->vrf) { + pm = cnat_portmaps[i]; + pm_inuse = cnat_portmaps_inuse[i]; + goto found_portmaps; + } + } + + printf("portmap is not created 0x%d\n", + vec_len(cnat_portmap_indices_by_vrf)); + return; + +found_portmaps: + if (mp->pm_index >= vec_len(pm)) { + printf("invalid port_index 0x%d >= 0x%d\n", + mp->pm_index, vec_len(pm)); + return; + } + +#ifdef LB_PORT + cnat_port_free(pm, pm_inuse, my_instance, + mp->pm_index, PORT_SINGLE, mp->port); +#else + cnat_port_free(pm, pm_inuse, + mp->pm_index, PORT_SINGLE, mp->port); +#endif + printf("\n pm_index %d port %d is deleted\n", mp->pm_index, mp->port); +} + + + +void cnat_db_clear_port_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_port_clear_t *mp; + + if (!argc) { + printf("no port# defined\n"); + return; + } + + if ( argc < 2 ) { + printf("no port# defined\n"); + return; + } + + if (argc > 2) { + printf("too many port# defined\n"); + return; + } + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + + mp->_spp_msg_id = SPP_API_CNAT_PORT_CLEAR; + mp->pm_index = argv[0]; + mp->port = argv[1]; + mp->vrf = 1; + + platform_send_msg(mp); +} + + +void spp_api_cnat_v4_add_vrf_map_t_handler +(spp_api_cnat_v4_add_vrf_map_t *mp); + +void spp_api_cnat_v4_del_vrf_map_t_handler +(spp_api_cnat_v4_del_vrf_map_t *mp); + +void spp_api_cnat_v4_add_static_port_t_handler +(spp_api_cnat_v4_add_static_port_t *mp); + +void spp_api_cnat_v4_del_static_port_t_handler +(spp_api_cnat_v4_del_static_port_t *mp); + + +void cnat_db_create_vrfmap_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_v4_add_vrf_map_t *mp; + + if ((argc != 4)) { + printf("need right input\n"); + return; + } + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + mp->_spp_msg_id = SPP_API_CNAT_V4_ADD_VRF_MAP; + mp->i_vrf = spp_host_to_net_byte_order_16(argv[0]); + mp->o_vrf = spp_host_to_net_byte_order_16(argv[1]); + mp->start_addr[0] = spp_host_to_net_byte_order_32(argv[2]); + mp->end_addr[0] = spp_host_to_net_byte_order_32(argv[3]); + + /* + * Some hardcoded values for the vrf ids + */ + mp->i_vrf_id = spp_host_to_net_byte_order_32(0x00000100 | mp->i_vrf); + mp->o_vrf_id = spp_host_to_net_byte_order_32(0x00000200 | mp->o_vrf); + + platform_send_msg(mp); +} + + +void cnat_db_delete_vrfmap_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_v4_del_vrf_map_t *mp; + + if (argc != 4) { + printf("need right input\n"); + return; + } + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + mp->_spp_msg_id = SPP_API_CNAT_V4_DEL_VRF_MAP; + mp->i_vrf = spp_host_to_net_byte_order_16(argv[0]); + mp->start_addr[0] = spp_host_to_net_byte_order_32(argv[2]); + mp->end_addr[0] = spp_host_to_net_byte_order_32(argv[3]); + + platform_send_msg(mp); +} + +void cnat_db_add_svi_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_config_svi_params_t *mp; + + if (argc != 3) { + printf("need right input\n"); + return; + } + + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + mp->_spp_msg_id = SPP_API_CNAT_CONFIG_SVI_PARAMS; + mp->uidb_index = spp_host_to_net_byte_order_16(argv[1]); + mp->svi_ipv4_addr = spp_host_to_net_byte_order_32(argv[2]); + platform_send_msg(mp); + return; +} + + + +void spp_api_cnat_port_create_t_handler(spp_api_cnat_port_create_t *mp) +{ + int i, j, k1, k2; + int my_index; + u32 ipv4_address; + u16 aport; + u32 pm_len =0; + cnat_errno_t rv; + u16 i_vrf; + char *out1, *out2, *out_f; + port_alloc_t pt1, pt2; + cnat_vrfmap_t *my_vrfmap; + cnat_portmap_v2_t *pm = 0; + u32 *firstp =0; + u32 nr_ports =0; + u32 nodd_ports = 0; + u32 neven_ports = 0; +#ifdef LB_PORT + u32 my_instance = 1; +#endif + char out_r[12] = "allocated-r"; + char out_o[12] = "allocated-o"; + char out_e[12] = "allocated-e"; +#ifndef NO_BULK_LOGGING + int nfv9_log_req; +#endif + + nr_ports = mp->nr_ports; + nodd_ports = mp->nodd_ports; + neven_ports = mp->neven_ports; + i_vrf = mp->vrf; + + /* + * this command is run after db create vrfmap + * or using vrf id in init function + */ + /* Already have a portmap vector for this VRF? */ + pool_foreach (my_vrfmap, cnat_map_by_vrf, ({ + if ((my_vrfmap->status == S_RUN) && + (my_vrfmap->i_vrf == i_vrf)) { + pm = my_vrfmap->portmap_list; + pm_len = vec_len(pm); + if (pm_len) { + goto found_portmaps; + } + } + })); + + printf("need to run db create vrfmaps first for this vrf0x%d\n", pm_len); + return; + +found_portmaps: + + if ((nr_ports + nodd_ports + neven_ports ) > (PORTS_PER_ADDR)) { + printf("invalid port# nr_ports %d + odd %d + even %d " + "should be less than 200 \n", nr_ports, nodd_ports, neven_ports); + return; + } + + /* + * first port + */ + firstp = nr_ports ? (&nr_ports) : (nodd_ports ? (&nodd_ports) : (&neven_ports)); + if (!(*firstp)) { + printf("invalid port# nr_ports %d odd %d even %d ", + nr_ports, nodd_ports, neven_ports); + } + out_f = nr_ports ? out_r : (nodd_ports ? out_o : out_e); + + rv = cnat_dynamic_port_alloc_v2 (pm, PORT_ALLOC_ANY, PORT_S_ODD, + &my_index, &ipv4_address, &aport, + cnat_static_port_range +#ifndef NO_BULK_LOGGING + , BULKSIZE_FROM_VRFMAP(my_vrfmap), + &nfv9_log_req +#endif + , 0, + &(my_vrfmap->rseed_ip) + ); + + if (rv != CNAT_SUCCESS) { + printf("failed-o\n"); + return; + } + printf("%s %8d %10x %8d\n", out_f, + my_index, ipv4_address, aport); + + (*firstp)--; + + for (i=0; i < nr_ports; i++) { + rv = cnat_dynamic_port_alloc_v2 (pm, PORT_ALLOC_DIRECTED, PORT_SINGLE, + &my_index, &ipv4_address, &aport, + cnat_static_port_range +#ifndef NO_BULK_LOGGING + , BULKSIZE_FROM_VRFMAP(my_vrfmap), + &nfv9_log_req +#endif + , 0, + &(my_vrfmap->rseed_ip) + ); + + if (rv == CNAT_SUCCESS) { + printf("%s %8d %10x %8d\n", out_r, + my_index, ipv4_address, aport); + } else { + printf("%s failed\n", __FUNCTION__); + return; + } + } + + if (nodd_ports > neven_ports) { + k1 = nodd_ports; + k2 = neven_ports; + pt1 = PORT_S_ODD; + pt2 = PORT_S_EVEN; + out1 = out_o; + out2 = out_e; + } else { + k1= neven_ports; + pt1 = PORT_S_EVEN; + k2 = nodd_ports; + pt2 = PORT_S_ODD; + out1 = out_e; + out2 = out_o; + } + + j = 0; + for (i=0; i < k1; i++) { + rv = cnat_dynamic_port_alloc_v2 (pm, PORT_ALLOC_DIRECTED, pt1, + &my_index, &ipv4_address, &aport, + cnat_static_port_range +#ifndef NO_BULK_LOGGING + , BULKSIZE_FROM_VRFMAP(my_vrfmap), + &nfv9_log_req +#endif + , 0, + &(my_vrfmap->rseed_ip) + ); + + if (rv == CNAT_SUCCESS) { + printf("%s %8d %10x %8d\n", out1, + my_index, ipv4_address, aport); + } else { + printf("%s failed\n", __FUNCTION__); + return; + } + + if (j < k2) { + rv = cnat_dynamic_port_alloc_v2 (pm, PORT_ALLOC_DIRECTED, pt2, + &my_index, &ipv4_address, &aport, + cnat_static_port_range +#ifndef NO_BULK_LOGGING + , BULKSIZE_FROM_VRFMAP(my_vrfmap), + &nfv9_log_req +#endif + , 0, + &(my_vrfmap->rseed_ip) + ); + + if (rv == CNAT_SUCCESS) { + printf("%s %8d %10x %8d\n", out2, + my_index, ipv4_address, aport); + j++; + } else { + printf("%s failed\n", __FUNCTION__); + return; + return; + } + } + } +} + + +void cnat_db_create_port_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_port_create_t *mp; + + if (argc != 4) { + printf("no proper input defined\n"); + return; + } + + if ((argv[0] == 0) && (argv[1] == 0) && (argv[2] == 0)) { + printf("no port# defined\n"); + return; + } + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + + mp->_spp_msg_id = SPP_API_CNAT_PORT_CREATE; + mp->nr_ports = argv[0]; + mp->nodd_ports = argv[1]; + mp->neven_ports = argv[2]; + mp->vrf = argv[3]; + + platform_send_msg(mp); +} + +void spp_api_cnat_port_delete_t_handler(spp_api_cnat_port_delete_t *mp) +{ + u32 pm_len; + cnat_vrfmap_t *my_vrfmap; + cnat_portmap_v2_t *pm = 0; + + u32 my_index, my_port; + u16 i_vrf; +#ifdef LB_PORT + u32 my_instance = 1; +#endif + + my_index = mp->pm_index; + my_port = mp->port; + i_vrf = mp->vrf; + + /* + * this command is run after db create port + */ + pool_foreach (my_vrfmap, cnat_map_by_vrf, ({ + if (my_vrfmap->i_vrf == i_vrf) { + pm = my_vrfmap->portmap_list; + pm_len = vec_len(pm); + if (pm_len) { + goto found_portmaps; + } + } + })); + + printf("portmap is not created 0x%d\n", + vec_len(cnat_portmap_indices_by_vrf)); + return; + +found_portmaps: + if (my_index >= pm_len) { + printf("invalid port_index 0x%d >= 0x%d\n", + my_index, pm_len); + return; + } + +#ifdef LB_PORT + cnat_port_free_v2(pm, my_instance, + my_index, PORT_SINGLE, mp->port,cnat_static_port_range); +#else + cnat_port_free_v2(pm, my_index, PORT_SINGLE, mp->port,cnat_static_port_range); +#endif + printf("\n pm_index %d port %d is deleted\n", mp->pm_index, mp->port); +} + +void cnat_db_delete_port_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_port_clear_t *mp; + + if (argc != 3) { + printf("no proper input defined\n"); + return; + } + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + + mp->_spp_msg_id = SPP_API_CNAT_PORT_DELETE; + mp->pm_index = argv[0]; + mp->port = argv[1]; + mp->vrf = argv[2]; + platform_send_msg(mp); +} + +void cnat_db_create_static_fwd_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_v4_add_static_port_t *mp; + + if (argc != 4) { + printf("need right input\n"); + return; + } + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + mp->_spp_msg_id = SPP_API_CNAT_V4_ADD_STATIC_PORT; + mp->i_vrf = spp_host_to_net_byte_order_16(argv[0]); + mp->i_ip = spp_host_to_net_byte_order_32(argv[1]); + mp->i_port = spp_host_to_net_byte_order_16(argv[2]); + mp->proto = argv[3]; + + platform_send_msg(mp); + return; +} + +void cnat_db_create_static_fwd_stby_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_v4_add_static_port_t *mp; + + if (argc != 7) { + printf("need right input\n"); + return; + } + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + mp->_spp_msg_id = SPP_API_CNAT_V4_ADD_STATIC_PORT; + mp->i_vrf = spp_host_to_net_byte_order_16(argv[0]); + mp->i_ip = spp_host_to_net_byte_order_32(argv[1]); + mp->i_port = spp_host_to_net_byte_order_16(argv[2]); + mp->proto = argv[3]; + mp->o_vrf_id = spp_host_to_net_byte_order_32(argv[4]); + mp->o_ip = spp_host_to_net_byte_order_32(argv[5]); + mp->o_port = spp_host_to_net_byte_order_16(argv[6]); + +printf("\ni_vrf %d, ip 0x%x, port %d, o_ip, port %d", mp->i_vrf, mp->i_ip, mp->i_port, mp->o_ip, mp->o_port); + + platform_send_msg(mp); + return; +} + +void cnat_db_delete_static_fwd_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_v4_del_static_port_t *mp; + + if (argc != 3) { + printf("need right input\n"); + return; + } + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + mp->_spp_msg_id = SPP_API_CNAT_V4_DEL_STATIC_PORT; + mp->i_vrf = spp_host_to_net_byte_order_16(argv[0]); + mp->i_ip = spp_host_to_net_byte_order_32(argv[1]); + mp->i_port = spp_host_to_net_byte_order_16(argv[2]); + + platform_send_msg(mp); + return; +} + +void cnat_nfv9_create_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_v4_config_nfv9_logging_t *mp; + + if (argc < 3) { + printf("nfv9 create i_vrf ip_addr port [refresh_rate] [timeout] [mtu]"); + return; + } + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + mp->_spp_msg_id = SPP_API_CNAT_V4_CONFIG_NFV9_LOGGING; + mp->enable = 1; + mp->i_vrf = spp_host_to_net_byte_order_16(argv[0]); + + mp->ipv4_address = spp_host_to_net_byte_order_32(argv[1]); + mp->port = spp_host_to_net_byte_order_16(argv[2]); + + if (argc > 3) { + mp->refresh_rate = spp_host_to_net_byte_order_16(argv[3]); + mp->timeout_rate = spp_host_to_net_byte_order_16(argv[4]); + mp->path_mtu = spp_host_to_net_byte_order_16(argv[5]); + } else { + mp->refresh_rate = spp_host_to_net_byte_order_16(1000); + mp->timeout_rate = spp_host_to_net_byte_order_16(30); + mp->path_mtu = spp_host_to_net_byte_order_16(1500); + } + platform_send_msg(mp); +} + +void cnat_delete_cgn (int argc, unsigned long *argv) +{ + void *mp_send; + spp_api_cnat_del_cgn_t *mp; + u32 mp_size; + + mp_size = sizeof(spp_api_cnat_del_cgn_t); + + mp = spp_msg_api_alloc(mp_size); + memset(mp, 0, mp_size); + + mp->_spp_msg_id = SPP_API_CNAT_DEL_CGN; + + mp_send = mp; + + platform_send_msg(mp); +} + +void cnat_debug_global_all (int argc, unsigned long *argv) +{ + spp_api_cnat_v4_debug_global_t *mp; + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + mp->_spp_msg_id = SPP_API_CNAT_V4_DEBUG_GLOBAL; + mp->debug_flag = CNAT_DEBUG_GLOBAL_ALL; + + platform_send_msg(mp); +} + +void cnat_debug_global_none (int argc, unsigned long *argv) +{ + spp_api_cnat_v4_debug_global_t *mp; + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + + mp->_spp_msg_id = SPP_API_CNAT_V4_DEBUG_GLOBAL; + mp->debug_flag = CNAT_DEBUG_NONE; + + platform_send_msg(mp); +} + + +void cnat_bulk_cmd (int argc, unsigned long *argv) +{ + void *mp_send; + + if (argc < 1) { + printf("\nargc = %d", argc); + printf("\n1. bulk cmd [0=static-port, 1=bulk_vrf, 2=policy_knob]"); + return; + } + + + switch (argv[0]) { + case 0: + { + spp_api_cnat_v4_bulk_add_delete_static_port_t *mp; + spp_api_cnat_v4_add_static_port_t *mp_sp; + u32 mp_size = + sizeof(spp_api_cnat_v4_bulk_add_delete_static_port_t) + + (sizeof(spp_api_cnat_v4_add_static_port_t))*2; + + mp = spp_msg_api_alloc(mp_size); + memset(mp, 0, mp_size); + + mp->_spp_msg_id = SPP_API_CNAT_V4_BULK_ADD_DELETE_STATIC_PORT; + + mp->num_static_port_entries = spp_host_to_net_byte_order_32(3); + + mp_sp = (spp_api_cnat_v4_add_static_port_t *) &(mp->pad2); + + mp_sp->_spp_msg_id = spp_host_to_net_byte_order_16( + SPP_API_CNAT_V4_ADD_STATIC_PORT); + mp_sp->proto = 2; + mp_sp->i_vrf = spp_host_to_net_byte_order_16(0x1); + mp_sp->i_ip = spp_host_to_net_byte_order_32(0x11111111); + mp_sp->i_port = spp_host_to_net_byte_order_16(0x7777); + + mp_sp++; + + + mp_sp->_spp_msg_id = spp_host_to_net_byte_order_16( + SPP_API_CNAT_V4_ADD_STATIC_PORT); + mp_sp->proto = 1; + mp_sp->i_vrf = spp_host_to_net_byte_order_16(0x1); + mp_sp->i_ip = spp_host_to_net_byte_order_32(0x22222222); + mp_sp->i_port = spp_host_to_net_byte_order_16(0x6666); + + mp_sp++; + + + mp_sp->_spp_msg_id = spp_host_to_net_byte_order_16( + SPP_API_CNAT_V4_ADD_STATIC_PORT); + mp_sp->proto = 1; + mp_sp->i_vrf = spp_host_to_net_byte_order_16(0x1); + mp_sp->i_ip = spp_host_to_net_byte_order_32(0x33333333); + mp_sp->i_port = spp_host_to_net_byte_order_16(0x5555); + + mp_send = mp; + + } + break; + + case 1: + { + spp_api_cnat_v4_bulk_vrf_map_t *mp; + spp_api_cnat_v4_single_vrf_map_req *mp_sp; + + u32 mp_size = sizeof(spp_api_cnat_v4_bulk_vrf_map_t) + + (sizeof(spp_api_cnat_v4_single_vrf_map_req))*2; + + mp = spp_msg_api_alloc(mp_size); + memset(mp, 0, mp_size); + + mp->_spp_msg_id = SPP_API_CNAT_V4_BULK_VRF_MAP; + + mp->num_vrfmap_entries = spp_host_to_net_byte_order_32(3); + + mp_sp = (spp_api_cnat_v4_single_vrf_map_req *) + &(mp->vrf_policy_enable); + + mp_sp->i_vrf_id = spp_host_to_net_byte_order_32(0xe0000001); + mp_sp->o_vrf_id = spp_host_to_net_byte_order_32(0xe0000000); + mp_sp->i_vrf = spp_host_to_net_byte_order_16(0x1); + mp_sp->o_vrf = spp_host_to_net_byte_order_16(0x0); + mp_sp->start_addr = spp_host_to_net_byte_order_32(0x11111100); + mp_sp->end_addr = spp_host_to_net_byte_order_32(0x111111ff); + mp_sp->vrf_policy_enable = spp_host_to_net_byte_order_16(0x3); + mp_sp->tcp_mss_value = spp_host_to_net_byte_order_16(0x111); + mp_sp->vrf_nfv9_logging_ipv4_address = spp_host_to_net_byte_order_32(0x11000001); + mp_sp->vrf_nfv9_logging_udp_port = spp_host_to_net_byte_order_16(0x1001); + mp_sp->vrf_nfv9_refresh_rate = spp_host_to_net_byte_order_16(0x100); + mp_sp->vrf_nfv9_timeout_rate = spp_host_to_net_byte_order_16(0x10); + mp_sp->vrf_nfv9_path_mtu = spp_host_to_net_byte_order_16(0x100); + + mp_sp++; + + mp_sp->i_vrf_id = spp_host_to_net_byte_order_32(0xe0000002); + mp_sp->o_vrf_id = spp_host_to_net_byte_order_32(0xe0000000); + mp_sp->i_vrf = spp_host_to_net_byte_order_16(0x2); + mp_sp->o_vrf = spp_host_to_net_byte_order_16(0x0); + mp_sp->start_addr = spp_host_to_net_byte_order_32(0x22220000); + mp_sp->end_addr = spp_host_to_net_byte_order_32(0x2222ffff); + mp_sp->vrf_policy_enable = spp_host_to_net_byte_order_16(0x1); + mp_sp->tcp_mss_value = spp_host_to_net_byte_order_16(0x222); + mp_sp->vrf_nfv9_logging_ipv4_address = spp_host_to_net_byte_order_32(0x22000002); + mp_sp->vrf_nfv9_logging_udp_port = spp_host_to_net_byte_order_16(0x2002); + mp_sp->vrf_nfv9_refresh_rate = spp_host_to_net_byte_order_16(0x200); + mp_sp->vrf_nfv9_timeout_rate = spp_host_to_net_byte_order_16(0x20); + mp_sp->vrf_nfv9_path_mtu = spp_host_to_net_byte_order_16(0x200); + + mp_sp++; + + mp_sp->i_vrf_id = spp_host_to_net_byte_order_32(0xe0000003); + mp_sp->o_vrf_id = spp_host_to_net_byte_order_32(0xe0000007); + mp_sp->i_vrf = spp_host_to_net_byte_order_16(0x3); + mp_sp->o_vrf = spp_host_to_net_byte_order_16(0x7); + mp_sp->start_addr = spp_host_to_net_byte_order_32(0x33333000); + mp_sp->end_addr = spp_host_to_net_byte_order_32(0x33333fff); + mp_sp->vrf_policy_enable = spp_host_to_net_byte_order_16(0x1); + mp_sp->tcp_mss_value = spp_host_to_net_byte_order_16(0x333); + mp_sp->vrf_nfv9_logging_ipv4_address = spp_host_to_net_byte_order_32(0x33000003); + mp_sp->vrf_nfv9_logging_udp_port = spp_host_to_net_byte_order_16(0x3003); + mp_sp->vrf_nfv9_refresh_rate = spp_host_to_net_byte_order_16(0x300); + mp_sp->vrf_nfv9_timeout_rate = spp_host_to_net_byte_order_16(0x30); + mp_sp->vrf_nfv9_path_mtu = spp_host_to_net_byte_order_16(0x300); + + mp_send = mp; + } + break; + + case 2: + { + spp_api_cnat_v4_bulk_policy_knob_t *mp; + + u32 mp_size = + sizeof(spp_api_cnat_v4_bulk_policy_knob_t) + + (sizeof(spp_api_cnat_v4_single_vrf_map_req))*2; + + mp = spp_msg_api_alloc(mp_size); + memset(mp, 0, mp_size); + + mp->_spp_msg_id = SPP_API_CNAT_V4_BULK_POLICY_KNOB; + + mp->port_limit = spp_host_to_net_byte_order_16(345); + mp->icmp_timeout = spp_host_to_net_byte_order_16(300); + mp->udp_init_timeout = spp_host_to_net_byte_order_16(175); + mp->udp_act_timeout = spp_host_to_net_byte_order_16(133); + mp->tcp_init_timeout = spp_host_to_net_byte_order_16(222); + mp->tcp_act_timeout = spp_host_to_net_byte_order_16(2345); + + mp->nat_policy_enable = spp_host_to_net_byte_order_32(0x7); + + mp->global_nfv9_logging_ipv4_address = spp_host_to_net_byte_order_32(0x77777777); + mp->global_nfv9_logging_udp_port = spp_host_to_net_byte_order_16(0x7007); + mp->global_nfv9_refresh_rate = spp_host_to_net_byte_order_16(0x700); + mp->global_nfv9_timeout_rate = spp_host_to_net_byte_order_16(0x70); + mp->global_nfv9_path_mtu = spp_host_to_net_byte_order_16(0x700); + + mp_send = mp; + } + break; + + + default: + printf("\nargv[2] = %d", argv[2]); + printf("\n2. bulk cmd [0=static-port, 1=bulk_vrf, 2=policy_knob+bulk_vrf]"); + return; + + } + platform_send_msg(mp_send); +} + +void cnat_nfv9_delete_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_v4_config_nfv9_logging_t *mp; + + if (argc != 1) { + printf("nfv9 delete i_vrf "); + return; + } + + mp = spp_msg_api_alloc (sizeof (*mp)); + memset(mp, 0, sizeof (*mp)); + mp->_spp_msg_id = SPP_API_CNAT_V4_CONFIG_NFV9_LOGGING; + mp->enable = 0; + mp->i_vrf = spp_host_to_net_byte_order_16(argv[0]); + platform_send_msg(mp); +} + +void cnat_generic_cmd (int argc, unsigned long *argv) +{ + spp_api_cnat_generic_command_request_t *mp; + + if (argc != 9) { + printf("generic command core type p1 p2 p3 p4 p5 p6 p7 "); + return; + } + + /* + * Allocate a large buffer for message req and resp structure + */ + mp = spp_msg_api_alloc (MAX_DEBUG_BUFFER_SIZE); + memset(mp, 0, MAX_DEBUG_BUFFER_SIZE); + mp->_spp_msg_id = SPP_API_CNAT_GENERIC_COMMAND_REQUEST; + mp->core_num = argv[0]; + mp->params[0] = spp_host_to_net_byte_order_32(argv[1]); + mp->params[1] = spp_host_to_net_byte_order_32(argv[2]); + mp->params[2] = spp_host_to_net_byte_order_32(argv[3]); + mp->params[3] = spp_host_to_net_byte_order_32(argv[4]); + mp->params[4] = spp_host_to_net_byte_order_32(argv[5]); + mp->params[5] = spp_host_to_net_byte_order_32(argv[6]); + mp->params[6] = spp_host_to_net_byte_order_32(argv[7]); + mp->params[7] = spp_host_to_net_byte_order_32(argv[8]); + platform_send_msg(mp); +} + +u32 icmp_sent_timestamps; /* 32 KB array per core */ +u8 v4_pkt_count = 0; + +cnat_icmp_msg_t icmp_msg_gen_allowed () +{ +#ifdef DISABLE_ICMP_THROTTLE_FOR_DEBUG_PURPOSE + return CNAT_ICMP_MSG; +#else + u32 current_timestamp; + spp_node_main_vector_t *nmv; + u32 updated_timestamp; + + v4_pkt_count ++; + + nmv = spp_get_node_main_vectorized_inline(); + + current_timestamp = nmv->ticks / nmv->ticks_per_second; + + PLATFORM_UPDATE_TIMESTAMP + if (PREDICT_FALSE(icmp_sent_timestamps != updated_timestamp)) { + v4_pkt_count = 1; + /* update timestamp */ + icmp_sent_timestamps = updated_timestamp; + } + if (PREDICT_TRUE(v4_pkt_count <= cnat_main_db_icmp_rate_limit_core)) { + return CNAT_ICMP_MSG; + } else { + return CNAT_NO_ICMP_MSG; + } +#endif +} + +u32 v6_icmp_sent_timestamps; /* 32 KB array per core */ +u8 v6_pkt_count = 0; + +cnat_icmp_msg_t v6_icmp_msg_gen_allowed () +{ +#ifdef DISABLE_ICMP_THROTTLE_FOR_DEBUG_PURPOSE + return CNAT_ICMP_MSG; +#else + u32 current_timestamp; + spp_node_main_vector_t *nmv; + u32 updated_timestamp; + + nmv = spp_get_node_main_vectorized_inline(); + + current_timestamp = nmv->ticks / nmv->ticks_per_second; + PLATFORM_UPDATE_TIMESTAMP + v6_pkt_count ++; + + if (PREDICT_FALSE(v6_icmp_sent_timestamps != updated_timestamp)) { + v6_pkt_count = 1; + /* update timestamp */ + v6_icmp_sent_timestamps = updated_timestamp; + } + if (PREDICT_TRUE(v6_pkt_count <= cnat_main_db_icmp_rate_limit_core)) { + return CNAT_ICMP_MSG; + } else { + return CNAT_NO_ICMP_MSG; + } +#endif +} + +u32 v4_udp_crc_zero_timestamps; +u32 v4_udp_crc_zero_pkt_count = 0; +int v4_crc_zero_udp_allowed () +{ + PLATFORM_V4_CRC_ZERO_UDP_ALLOWED + /* Currently not supported for Brahmos. we need to take care of this */ + spp_node_main_vector_t *nmv; + u32 hash_value, current_timestamp; + + nmv = spp_get_node_main_vectorized_inline(); + + current_timestamp = nmv->ticks / nmv->ticks_per_second; + v4_udp_crc_zero_pkt_count++; + if (PREDICT_FALSE(v4_udp_crc_zero_timestamps != current_timestamp)) { + v4_udp_crc_zero_pkt_count = 1; + v4_udp_crc_zero_timestamps = current_timestamp; + } + if (PREDICT_TRUE(v4_udp_crc_zero_pkt_count <= + crc_zero_udp_rate_limit_core)) { + return 1; + } else { + return 0; + } +} + +/* + * ipv4_decr_ttl_n_calc_csum() + * - It decrements the TTL and calculates the incremental IPv4 checksum + */ + +ALWAYS_INLINE( +void ipv4_decr_ttl_n_calc_csum(ipv4_header *ipv4)) +{ + u32 checksum; + u16 old; + + old = ntohs(*(u16 *)&ipv4->ttl); + + /* Decrement TTL */ + ipv4->ttl--; + + /* Calculate incremental checksum */ + checksum = old + (~ntohs(*(u16 *)&ipv4->ttl) & 0xFFFF); + checksum += ntohs(ipv4->checksum); + checksum = (checksum & 0xFFFF) + (checksum >> 16); + ipv4->checksum = htons(checksum + (checksum >> 16)); +} + +ALWAYS_INLINE( +void calc_ipv4_checksum (ipv4_header *ipv4)) +{ + u16 *data = (u16 *) ipv4; + u32 checksum = 0; + + checksum = crc_calc(ipv4); + + /* Now produce the 1's complement */ + ipv4->checksum = spp_host_to_net_byte_order_16(((u16) (~(u16)checksum))); +} + +ALWAYS_INLINE( +void calc_v4_icmp_checksum (icmp_v4_t *icmp, int ipv4_payload_size)) +{ + u16 *data = (u16 *) icmp; + int num_hwords = (ipv4_payload_size)/2; + u32 checksum = 0; + + icmp->checksum = 0; + if (PREDICT_FALSE((ipv4_payload_size%2) != 0)) { + num_hwords += 1; + /* Append 0's in the last octet */ + *((u8 *)data + ipv4_payload_size) = 0; + } + while (num_hwords) { + checksum += (u32)spp_net_to_host_byte_order_16(data++); + num_hwords--; + } + + /* Add in the carry of the original sum */ + checksum = (checksum & 0xFFFF) + (checksum >> 16); + /* Add in the carry of the final sum */ + checksum = (checksum & 0xFFFF) + (checksum >> 16); + /* Now produce the 1's complement */ + icmp->checksum = spp_host_to_net_byte_order_16(((u16) (~(u16)checksum))); +} + +ALWAYS_INLINE( +void calc_v6_icmp_checksum (ipv6_header_t *ipv6, u16 ip_payload_size)) +{ + u16 *data; + u16 *data1; + int i; + icmp_v6_t *icmp; + int num_hwords = (ip_payload_size)/2; + u32 checksum = 0; + pseudo_v6_header_t pseudo_header; + + icmp = (icmp_v6_t *) ((u8 *)ipv6 + IPV6_HDR_LEN); + data = (u16 *) icmp; + icmp->checksum = 0; + +#if 1 + if (PREDICT_FALSE((ip_payload_size%2) != 0)) { + num_hwords += 1; + /* Append 0's in the last octet */ + *((u8 *)data + ip_payload_size) = 0; + } +#endif + + /* construct the pseudo header */ + + pseudo_header.src_addr[0] = ipv6->src_addr[0]; + pseudo_header.src_addr[1] = ipv6->src_addr[1]; + pseudo_header.src_addr[2] = ipv6->src_addr[2]; + pseudo_header.src_addr[3] = ipv6->src_addr[3]; + pseudo_header.dst_addr[0] = ipv6->dst_addr[0]; + pseudo_header.dst_addr[1] = ipv6->dst_addr[1]; + pseudo_header.dst_addr[2] = ipv6->dst_addr[2]; + pseudo_header.dst_addr[3] = ipv6->dst_addr[3]; + pseudo_header.payload_length = spp_host_to_net_byte_order_16(ip_payload_size); + pseudo_header.next_header = spp_host_to_net_byte_order_16(ipv6->next_header); + + data1 = (u16 *) &pseudo_header; + + /* sizeof(pseudo_v6_header_t) = 36 */ + for (i = 0; i < 18; i++) { + checksum += (u32)spp_net_to_host_byte_order_16(data1++); + } + +checksum_calc: + + if (PREDICT_TRUE(num_hwords)) { + checksum += (u32)spp_net_to_host_byte_order_16(data); + num_hwords--; + data++; + goto checksum_calc; + } + + /* Add in the carry of the original sum */ + checksum = (checksum & 0xFFFF) + (checksum >> 16); + /* Add in the carry of the final sum */ + checksum = (checksum & 0xFFFF) + (checksum >> 16); + /* Now produce the 1's complement */ + icmp->checksum = spp_host_to_net_byte_order_16(((u16) (~(u16)checksum))); +} + +void icmp_error_generate_v6 (spp_ctx_t *ctx, u8 icmp_type, + u8 icmp_code, u16 uidb_index) { + + u16 ip_hdr_len, ip_payload_size; + u32 *src_p, * dst_p; + icmp_v6_t *icmp; + int i; + ipv6_header_t *ip_old, *ip_new; + u16 icmp_payload_len; + + /* + * As per RFC 2463, we limit the maximum size of generated ICMPv6 message to * 1280. And hence if the packet is bigger than 1280, then it needs to be + * truncated. Also, if the packet had multiple chained buffers, we need to + * free all chained buffers, except the first one. + */ + free_all_but_first_chained_buffers(ctx); + + ip_hdr_len = IPV6_HDR_LEN; + /* offset to ip payload */ + + ip_old = (ipv6_header_t *)PLATFORM_CTX_CURRENT_HDR; + ip_new = (ipv6_header_t *) ((u8 *) PLATFORM_CTX_CURRENT_HDR - ICMPV6_ERR_SIZE); + icmp = (icmp_v6_t*) ( (u8*)ip_new + ip_hdr_len); + + icmp_payload_len = ip_hdr_len + + spp_net_to_host_byte_order_16(&(ip_old->payload_length)) ; + + ip_payload_size = ICMPV6_HDR_SIZE + icmp_payload_len; + /* + * There is no easy way to predict this case as the probablity that the IPv6 + * pkt is big depends on the type of traffic. Let us optimize the big + * pkt case as it involves more processing + * + * If the pkt size exceeds IPV6_MIN_PATH_MTU truncate it to IPV6_MIN_PATH_MTU + */ + if (PREDICT_TRUE((ip_payload_size + ip_hdr_len) > IPV6_MIN_PATH_MTU)) { + ip_payload_size = IPV6_MIN_PATH_MTU - ip_hdr_len; + } + + /* Following ICMP op has to be after ip header being copied */ + icmp->type = icmp_type; + icmp->code = icmp_code; + + ip_new->version_trafficclass_flowlabel = spp_host_to_net_byte_order_32( + VERSION_TRAFFICCLASS_FLOWLABEL); + ip_new->payload_length = spp_host_to_net_byte_order_16(ip_payload_size); + ip_new->next_header = IPV6_PROTO_ICMPV6; + ip_new->hop_limit = 64; + ip_new->dst_addr[0] = ip_old->src_addr[0]; + ip_new->dst_addr[1] = ip_old->src_addr[1]; + ip_new->dst_addr[2] = ip_old->src_addr[2]; + ip_new->dst_addr[3] = ip_old->src_addr[3]; + + ip_new->src_addr[0] = + spp_host_to_net_byte_order_32(svi_params_array[uidb_index].ipv6_addr[0]); + ip_new->src_addr[1] = + spp_host_to_net_byte_order_32(svi_params_array[uidb_index].ipv6_addr[1]); + ip_new->src_addr[2] = + spp_host_to_net_byte_order_32(svi_params_array[uidb_index].ipv6_addr[2]); + ip_new->src_addr[3] = + spp_host_to_net_byte_order_32(svi_params_array[uidb_index].ipv6_addr[3]); + /* calc checksum for icmp */ + + calc_v6_icmp_checksum(ip_new, ip_payload_size); +#if 0 + printf("Flow = 0x%x\n", ip_new->version_trafficclass_flowlabel); + printf("Hoplimit = 0x%x\n", ip_new->hop_limit); + printf("Length= 0x%x\n", ip_new->payload_length); + printf("Next header = 0x%x\n", ip_new->next_header); + printf("Src add0 = 0x%x\n", ip_new->src_addr[0]); + printf("Src add1 = 0x%x\n", ip_new->src_addr[1]); + printf("Src add2 = 0x%x\n", ip_new->src_addr[2]); + printf("Src add3 = 0x%x\n", ip_new->src_addr[3]); + printf("Dst add0 = 0x%x\n", ip_new->dst_addr[0]); + printf("Dst add1 = 0x%x\n", ip_new->dst_addr[1]); + printf("Dst add2 = 0x%x\n", ip_new->dst_addr[2]); + printf("Dst add3 = 0x%x\n", ip_new->dst_addr[3]); + printf("Icmp type = 0x%x\n", icmp->type); + printf("Icmp code = 0x%x\n", icmp->code); + + printf("\n\nICMP packet:\n"); + for (i = 0; i < 10; i ++) { + printf("0x%x " , *((u8 *)icmp + i)); + if ((i%16) == 15) { + printf("\n"); + } + } +#endif + + ctx->current_header -= ICMPV6_ERR_SIZE; + ctx->current_length = ip_payload_size + ip_hdr_len; + PLATFORM_CNAT_SET_TX_VRF(ctx,uidb_index); +} + +void icmp_error_generate_v2 (ipv4_header *ip, u8 icmp_type, + u8 icmp_code, u16 mtu, u32 src_ip) +{ + + u16 ip_hdr_len, ip_payload_size; + u32 *src_p, * dst_p; + icmp_v4_t *icmp; + + ip_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; /* offset to ip payload */ + icmp = (icmp_v4_t*) ( (u8*)ip + ip_hdr_len); + ip_payload_size = sizeof(icmp_v4_t) + ip_hdr_len + + ICMP_UNREACHABLE_IP_PAYLOAD_SIZE; + + src_p = (u32*) + ((u8*)ip + ip_hdr_len + ICMP_UNREACHABLE_IP_PAYLOAD_SIZE - 4); + dst_p = (u32*) ((u8*)src_p + sizeof(ipv4_header) + + sizeof(icmp_v4_t)); + + while(src_p >= (u32*)ip) *dst_p-- = *src_p--; + + /* Following ICMP op has to be after ip header being copied */ + icmp->type = icmp_type; + icmp->code = icmp_code; + icmp->identifier = 0; + icmp->sequence = 0; + if(PREDICT_FALSE(mtu != 0)) { + icmp->sequence = spp_host_to_net_byte_order_16(mtu); + } + + + /* build icmp header, keep original tos, identification values */ + ip->version_hdr_len_words = 0x45; + ip->total_len_bytes = sizeof(ipv4_header) + ip_payload_size; + ip->total_len_bytes = spp_host_to_net_byte_order_16(ip->total_len_bytes); + ip->frag_flags_offset = 0; + ip->ttl = 64; + ip->protocol = ICMP_PROT; + ip->checksum = 0; + ip->dest_addr = ip->src_addr; + ip->src_addr = spp_host_to_net_byte_order_32(src_ip); + + /* calc checksum for ip and icmp */ + + calc_ipv4_checksum(ip); + calc_v4_icmp_checksum( (icmp_v4_t *) ((u8*) ip + sizeof(ipv4_header)), + ip_payload_size); +} + +void icmp_error_generate (ipv4_header *ip, u8 icmp_type, + u8 icmp_code, u16 uidb_index) { + + u16 ip_hdr_len, ip_payload_size; + u32 *src_p, * dst_p; + icmp_v4_t *icmp; + + ip_hdr_len = (ip->version_hdr_len_words & 0xf) << 2; /* offset to ip payload */ + icmp = (icmp_v4_t*) ( (u8*)ip + ip_hdr_len); + ip_payload_size = sizeof(icmp_v4_t) + ip_hdr_len + + ICMP_UNREACHABLE_IP_PAYLOAD_SIZE; + + src_p = (u32*) + ((u8*)ip + ip_hdr_len + ICMP_UNREACHABLE_IP_PAYLOAD_SIZE - 4); + dst_p = (u32*) ((u8*)src_p + sizeof(ipv4_header) + + sizeof(icmp_v4_t)); + + while(src_p >= (u32*)ip) *dst_p-- = *src_p--; + + /* Following ICMP op has to be after ip header being copied */ + icmp->type = icmp_type; + icmp->code = icmp_code; + icmp->identifier = 0; + icmp->sequence = 0; + + + /* build icmp header, keep original tos, identification values */ + ip->version_hdr_len_words = 0x45; + ip->total_len_bytes = sizeof(ipv4_header) + ip_payload_size; + ip->total_len_bytes = spp_host_to_net_byte_order_16(ip->total_len_bytes); + ip->frag_flags_offset = 0; + ip->ttl = 64; + ip->protocol = ICMP_PROT; + ip->checksum = 0; + ip->dest_addr = ip->src_addr; + + ip->src_addr = spp_host_to_net_byte_order_32(svi_params_array[uidb_index].ipv4_addr); + + /* calc checksum for ip and icmp */ + + calc_ipv4_checksum(ip); + calc_v4_icmp_checksum( (icmp_v4_t *) ((u8*) ip + sizeof(ipv4_header)), + ip_payload_size); +#if 0 + printf("version_hdr_len_words = 0x%x\n", ip->version_hdr_len_words); + printf("total_len_bytes = 0x%x\n", ip->total_len_bytes); + printf("Frag = 0x%x\n", ip->frag_flags_offset); + printf("ttl = 0x%x\n", ip->ttl); + printf("Protocol = 0x%x\n", ip->protocol); + printf("checksum = 0x%x\n", ip->checksum); + printf("Dest addr = 0x%x\n", ip->dest_addr); + printf("Src addr = 0x%x\n", ip->src_addr); + printf("Icmp type = 0x%x\n", icmp->type); + printf("Icmp code = 0x%x\n", icmp->code); +#endif + +} + +int icmpv4_generate_with_throttling_v2 (spp_ctx_t *ctx, ipv4_header *ipv4, + int icmp_type, int icmp_code, + u16 mtu, u32 src_ip) +{ + u16 ip_hdr_len; + icmp_v4_t *icmp; + u16 rx_uidb_index = ctx->ru.rx.uidb_index; + if (icmp_msg_gen_allowed()) { + free_all_but_first_chained_buffers(ctx); + icmp_error_generate_v2(ipv4, icmp_type, icmp_code, mtu, src_ip); + ctx->current_length = (u16) + ((u8*)ctx->current_header - ctx->packet_data) + + spp_net_to_host_byte_order_16(&ipv4->total_len_bytes); + PLATFORM_CNAT_SET_TX_VRF(ctx,rx_uidb_index); + return 1; + } else { + return 0; + } +} + +int icmpv4_generate_with_throttling (spp_ctx_t *ctx, ipv4_header *ipv4, + u16 rx_uidb_index) +{ + int icmp_type; + int icmp_code; + + if (icmp_msg_gen_allowed()) { + /* ICMP error would be small, so one buffer is enough. Clear the other */ + free_all_but_first_chained_buffers(ctx); + + icmp_type = ICMPV4_TIMEEXCEEDED; + icmp_code = ICMPV4_TIMTTL; + icmp_error_generate(ipv4, icmp_type, icmp_code, rx_uidb_index); + ctx->current_length = (u16) + ((u8*)ctx->current_header - ctx->packet_data) + + spp_net_to_host_byte_order_16(&ipv4->total_len_bytes); + PLATFORM_CNAT_SET_TX_VRF(ctx,rx_uidb_index); + return 1; + } else { + return 0; + } +} + +int icmpv4_generate_with_throttling_v1 (spp_ctx_t *ctx, ipv4_header *ipv4, + u16 rx_uidb_index, u32 type, u32 code) +{ + if (icmp_msg_gen_allowed()) { + /* ICMP error would be small, so one buffer is enough. Clear the other */ + free_all_but_first_chained_buffers(ctx); + + icmp_error_generate(ipv4, type, code, rx_uidb_index); + ctx->current_length = (u16) + ((u8*)ctx->current_header - ctx->packet_data) + + spp_net_to_host_byte_order_16(&ipv4->total_len_bytes); + PLATFORM_CNAT_SET_TX_VRF(ctx,rx_uidb_index); + return 1; + } else { + return 0; + } +} + + +int icmpv6_generate_with_throttling (spp_ctx_t *ctx, ipv6_header_t *ipv6, + u16 rx_uidb_index) +{ + int icmp_type; + int icmp_code; + + if (v6_icmp_msg_gen_allowed()) { + icmp_type = ICMPV6_TIMEEXCEEDED; + icmp_code = ICMPV6_TIMTTL; + icmp_error_generate_v6(ctx, icmp_type, icmp_code, rx_uidb_index); + return 1; + } else { + return 0; + } +} + +int icmpv6_generate_with_throttling_v1 (spp_ctx_t *ctx, ipv6_header_t *ipv6, + u16 rx_uidb_index, u32 type, u32 code) +{ + + if (v6_icmp_msg_gen_allowed()) { + icmp_error_generate_v6(ctx, type, code, rx_uidb_index); + return 1; + } else { + return 0; + } +} +#endif + +inline +void calculate_window_scale(tcp_hdr_type *tcp_header, u8 *scale) { + + u8 check_options = 0; + + *scale = 0; + check_options = ((tcp_header->flags & TCP_FLAG_SYN) && + (((tcp_header->hdr_len>>4) << 2) > sizeof(tcp_hdr_type))); + + if (PREDICT_FALSE(check_options)) { + u8 *options_ptr = tcp_findoption(tcp_header, TCP_OPTION_WINDOW_SCALE); + + /* + * TCP option field: | kind 1B | len 1B | value 2B| + * where kind != [0, 1] + */ + if (PREDICT_TRUE(options_ptr && + (options_ptr[1] == TCP_OPTION_WINDOW_SCALE))) { + u8 *ptr = (u8*)(options_ptr + 2); + *scale = *ptr; + + if(PREDICT_FALSE(*scale >= 14)) { + *scale = 14; + } + + return; + } + } +} + +#if 0 +ALWAYS_INLINE( +void cnat_log_nat44_tcp_seq_mismatch( + cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap)) +{ + /* As of now, Netflow does not require this to be logged + * So only syslog + */ + if(PREDICT_TRUE(db->flags & CNAT_TAC_SEQ_MISMATCH)) { + /* Already logged ..*/ + return; + } + /* else, set the flag and call the log API */ + + db->flags = db->flags | CNAT_TAC_SEQ_MISMATCH; + + cnat_syslog_nat44_tcp_seq_mismatch(db, vrfmap); +} + + +static int cnat_util_init (void *notused) +{ + /* run SPP_API_CNAT_PORTMAP_CREATE first*/ + spp_msg_api_set_handler(SPP_API_CNAT_PORT_ALLOCATE, + spp_api_cnat_port_allocate_t_handler); + + + spp_msg_api_set_handler(SPP_API_CNAT_PORT_CLEAR, + spp_api_cnat_port_clear_t_handler); + + /* run vrfmap config first */ + spp_msg_api_set_handler(SPP_API_CNAT_PORT_CREATE, + spp_api_cnat_port_create_t_handler); + + spp_msg_api_set_handler(SPP_API_CNAT_PORT_DELETE, + spp_api_cnat_port_delete_t_handler); + return 0; +} + +void +print_ipv6_pkt (ipv6_header_t *ip) +{ + u32 i, total_len, l4_len=0; + + u8 *pkt = (u8 *) ip; + + total_len = spp_net_to_host_byte_order_16(&ip->payload_length); + + /* we rarely need to debug > 200 bytes of packet */ + if(total_len > 200) { + total_len = 200; + } + + printf("\n======== PRINTING PKT START======\n"); + printf("======== IPv6 PAYLOAD LEN %d ===========\n", total_len); + for (i=0; i < 40; i++) { + printf(" %02X ", *(pkt + i)); + if(i%16==15) + printf("\n"); + } + + if (ip->next_header == IPV6_PROTO_TCP) { + printf("\n======== TCP HEADER =================\n"); + l4_len = 20; + } + else if (ip->next_header == IPV6_PROTO_UDP) { + printf("\n======== UDP HEADER =================\n"); + l4_len = 8; + } + else if (ip->next_header == IPV6_PROTO_ICMPV6) { + printf("\n======== ICMP HEADER =================\n"); + l4_len = 8; + } + + for (i=40; i < (l4_len + 40); i++) { + printf(" %02X ", *(pkt + i)); + } + + printf("\n======== LAYER4 PAYLOAD ===================\n"); + for (i=(l4_len + 40); i < total_len; i++) { + printf(" %02X ", *(pkt + i)); + if(i%16==15) + printf("\n"); + } + + printf("\n======== PRINTING PKT END =======\n"); +} + + + +PLATFORM_SPP_INIT_FUNCTION(cnat_util_init); +#endif diff --git a/vnet/vnet/vcgn/cnat_v4_ftp_alg.h b/vnet/vnet/vcgn/cnat_v4_ftp_alg.h new file mode 100644 index 00000000000..df3dfcb0797 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_v4_ftp_alg.h @@ -0,0 +1,133 @@ +/* + *------------------------------------------------------------------ + * cnat_v4_ftp_alg.h + * + * Copyright (c) 2012-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_V4_FTP_ALG_H__ +#define __CNAT_V4_FTP_ALG_H__ + + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +#include "tcp_header_definitions.h" +#include "dslite_defs.h" +#include "dslite_db.h" + +/* shorter form of byte order functions */ + +#define net2host16(x) clib_net_to_host_u16( x) +#define net2host32(x) clib_net_to_host_u32( x) +#define net2host64(x) clib_net_to_host_u64( x) +#define host2net16(x) clib_host_to_net_u16(x) +#define host2net32(x) clib_host_to_net_u32(x) +#define host2net64(x) clib_host_to_net_u64(x) + +//#define BIGENDIAN + +typedef struct iphdrtype_ { + u8 v_ihl; /* version and IP header length */ + u8 tos; /* type of service */ + u16 tl; /* total length */ + u16 id; /* identifier */ + u16 ipreserved: 1; + u16 dontfragment: 1; + u16 morefragments: 1; + u16 fo: 13; /* fragment offset */ + u8 ttl; /* time to live */ + u8 prot; /* protocol type */ + u16 checksum; /* checksum */ + u32 srcadr; /* IP source address */ + u32 dstadr; /* IP destination address */ +} iphdrtype; + + +typedef struct tcptype_ { + u16 sourceport; + u16 destinationport; + u32 sequencenumber; + u32 acknowledgementnumber; + u8 dataoffset; + u8 flags; +#if 0 +/* bypass the ENDIAN part */ +#ifdef BIGENDIAN + u8 reserved: 2; + u8 urg: 1; + u8 ack: 1; + u8 psh: 1; + u8 rst: 1; + u8 syn: 1; + u8 fin: 1; +#else + u8 fin: 1; + u8 syn: 1; + u8 rst: 1; + u8 psh: 1; + u8 ack: 1; + u8 urg: 1; + u8 reserved2: 2; +#endif +#endif + + u16 window; + u16 checksum; + u16 urgentpointer; + u8 data[0]; +} tcptype ; + + +int watch_ftp_port_cmd (iphdrtype *ip, + tcptype *tcp, + u32 * ip_addr, + u16 * port); + + +u8 * ftp_test_pkt_gen (u32 ip_addr, u16 port); + +int update_ftp_port(u8 * pkt, u32 new_ip, u16 new_port, i8 * delta, + cnat_main_db_entry_t *db_tcp_control, + dslite_table_entry_t *dslite_entry_ptr, + ipv6_header_t *ipv6_hdr); +/* + * caller needs to check if it's a ftp packet + * this function returns 1 + * if packet being updated for PORT + * otherwise return 0. + * Assume IP header DOES NOT have option fields + */ + +int cnat_ftp_alg ( u8* pkt, i8 * delta, cnat_main_db_entry_t *db, + dslite_table_entry_t *dslite_entry_ptr, + ipv6_header_t *ipv6_hdr); + +#define FTP_ALG_DEBUG_PRINTF_ENABLED 1 + +#ifdef FTP_ALG_DEBUG_PRINTF_ENABLED + +#define FTP_ALG_DEBUG_PRINTF(...) { \ + if (global_debug_flag & CNAT_DEBUG_FTP_ALG) { \ + printf(__VA_ARGS__); \ + } } + +#else + +#define FTP_ALG_DEBUG_PRINTF(...) + +#endif + +#endif /* __CNAT_V4_FTP_ALG_H__ */ diff --git a/vnet/vnet/vcgn/cnat_v4_functions.c b/vnet/vnet/vcgn/cnat_v4_functions.c new file mode 100644 index 00000000000..d3051fba5a7 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_v4_functions.c @@ -0,0 +1,364 @@ +/* + *--------------------------------------------------------------------------- + * cnat_v4_funtions.c + * + * Copyright (c) 2008-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> + + +#include "tcp_header_definitions.h" +#include "cnat_db.h" +#include "cnat_config.h" +#include "cnat_v4_functions.h" +#include "dslite_defs.h" +#include "dslite_db.h" + +static u32 tcp_logging_count; +static u32 tcp_logging_overflow; + +static tcp_logging_struct_t tcp_logging_array[MAX_TCP_LOGGING_COUNT]; + +/* + * Function to log TCP pkts checksum changes.. + */ +void +tcp_debug_logging ( + u32 seq_num, + u32 ack_num, + u32 old_ip, + u32 new_ip, + u16 old_port, + u16 new_port, + u16 old_ip_crc, + u16 new_ip_crc, + u16 old_tcp_crc, + u16 new_tcp_crc) +{ + tcp_logging_array[tcp_logging_count].seq_num = seq_num; + tcp_logging_array[tcp_logging_count].ack_num = ack_num; + tcp_logging_array[tcp_logging_count].old_ip = old_ip; + tcp_logging_array[tcp_logging_count].new_ip = new_ip; + tcp_logging_array[tcp_logging_count].old_port = old_port; + tcp_logging_array[tcp_logging_count].new_port = new_port; + tcp_logging_array[tcp_logging_count].old_ip_crc = old_ip_crc; + tcp_logging_array[tcp_logging_count].new_ip_crc = new_ip_crc; + tcp_logging_array[tcp_logging_count].old_tcp_crc = old_tcp_crc; + tcp_logging_array[tcp_logging_count].new_tcp_crc = new_tcp_crc; + + tcp_logging_count++; + + if (tcp_logging_count >= MAX_TCP_LOGGING_COUNT) { + tcp_logging_overflow = 1; + tcp_logging_count = 0; + } +} + +/* + * Function to dmp TCP pkts logged.. + */ +void +tcp_debug_logging_dump (void) +{ + u32 i, total_count, start_entry; + + if (tcp_logging_overflow) { + total_count = MAX_TCP_LOGGING_COUNT; + start_entry = tcp_logging_count; + printf("Logging Entries Wrapped Around, displaying %d entries\n", + total_count); + } else { + total_count = tcp_logging_count; + start_entry = 0; + printf("Displaying %d entries\n", total_count); + } + + printf("SEQ ACK IP_O IP_N PORT_O PORT_N L3_CRC_O L3_CRC_N L4_CRC_O L4_CRC_N\n"); + + for (i = 0; i < total_count; i++) { + u32 entry = (i + start_entry) % MAX_TCP_LOGGING_COUNT; + + printf("%04d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n", + entry, + tcp_logging_array[entry].seq_num, + tcp_logging_array[entry].ack_num, + tcp_logging_array[entry].old_ip, + tcp_logging_array[entry].new_ip, + tcp_logging_array[entry].old_port, + tcp_logging_array[entry].new_port, + tcp_logging_array[entry].old_ip_crc, + tcp_logging_array[entry].new_ip_crc, + tcp_logging_array[entry].old_tcp_crc, + tcp_logging_array[entry].new_tcp_crc); + } +} + +/* + * Function to enable TCP logging + */ +void +tcp_debug_logging_enable_disable (u32 enable_flag) +{ + switch (enable_flag) { + + case TCP_LOGGING_DISABLE: + if (tcp_logging_enable_flag == TCP_LOGGING_DISABLE) { + printf("\nTCP Logging ALREADY DISABLED\n"); + } else { + printf("\nTCP Logging DISABLED\n"); + } + tcp_logging_enable_flag = 0; + break; + + case TCP_LOGGING_ENABLE: + if (tcp_logging_enable_flag == TCP_LOGGING_ENABLE) { + printf("\nTCP Logging ALREADY ENABLED\n"); + } else { + tcp_logging_enable_flag = 1; + tcp_logging_count = 0; + tcp_logging_overflow = 0; + + printf("\nTCP Logging ENABLED\n"); + } + break; + + case TCP_LOGGING_PACKET_DUMP: + tcp_debug_logging_dump(); + break; + + case TCP_LOGGING_SUMMARY_DUMP: + default: + printf("\ntcp_logging_enable_flag %d, tcp_log_count %d\n", + tcp_logging_enable_flag, tcp_logging_count); + printf("To Enable TCP LOGGING provide a flag value of %d\n", + TCP_LOGGING_ENABLE); + break; + } +} + +void hex_dump (u8 * p, int len) { + int i; + for (i=0;i<len;i++) { + if(i && (i & 0x3 ) == 0) printf(" "); + if(i && (i & 0xf ) == 0) printf("\n"); + PLATFORM_DEBUG_PRINT("%02X ", p[i]); + } + PLATFORM_DEBUG_PRINT("\n"); +} + +void +print_icmp_pkt (ipv4_header *ip) +{ + u32 i, total_len; + + u8 *pkt = (u8 *) ip; + + total_len = clib_net_to_host_u16(ip->total_len_bytes); + + printf("\n======== PRINTING PKT START======\n"); + printf("======== IP PACKET LEN %d ===========\n", total_len); + for (i=0; i < 20; i++) { + printf(" %02X ", *(pkt + i)); + } + + printf("\n======== ICMP HEADER =================\n"); + for (i=20; i < 28; i++) { + printf(" %02X ", *(pkt + i)); + } + + printf("\n======== ICMP BODY ===================\n"); + for (i=28; i < total_len; i++) { + printf(" %02X ", *(pkt + i)); + } + + printf("\n======== PRINTING PKT END =======\n"); +} + +void +print_udp_pkt (ipv4_header *ip) +{ + u32 i, total_len, udp_len; + + u8 *pkt = (u8 *) ip; + + total_len = clib_net_to_host_u16(ip->total_len_bytes); + udp_len = total_len - 20; + + printf("\n======== PRINTING PKT START======\n"); + printf("======== IP PACKET LEN %d ===========\n", total_len); + for (i=0; i < 20; i++) { + printf(" %02X ", *(pkt + i)); + } + printf("\n======== UDP PSEUDO HEADER ==========\n"); + for (i=12; i < 20; i++) { + printf(" %02X ", *(pkt + i)); + } + printf(" 00 11 %02X %02X ", udp_len >> 8, udp_len & 0xff); + + printf("\n======== UDP HEADER =================\n"); + for (i=20; i < 28; i++) { + printf(" %02X ", *(pkt + i)); + } + printf("\n======== UDP BODY ===================\n"); + for (i=28; i < total_len; i++) { + printf(" %02X ", *(pkt + i)); + } + + printf("\n======== PRINTING PKT END =======\n"); +} + +void +print_tcp_pkt (ipv4_header *ip) +{ + u32 i, total_len, tcp_len; + + u8 *pkt = (u8 *) ip; + + total_len = clib_net_to_host_u16(ip->total_len_bytes); + tcp_len = total_len - 20; + + printf("\n======== PRINTING PKT START======\n"); + printf("======== IP PACKET LEN %d ===========\n", total_len); + for (i=0; i < 20; i++) { + printf(" %02X ", *(pkt + i)); + } + printf("\n======== TCP PSEUDO HEADER ==========\n"); + for (i=12; i < 20; i++) { + printf(" %02X ", *(pkt + i)); + } + printf(" 00 06 %02X %02X ", tcp_len >> 8, tcp_len & 0xff); + + printf("\n======== TCP HEADER =================\n"); + for (i=20; i < 40; i++) { + printf(" %02X ", *(pkt + i)); + } + printf("\n======== TCP BODY ===================\n"); + for (i=40; i < total_len; i++) { + printf(" %02X ", *(pkt + i)); + } + + printf("\n======== PRINTING PKT END =======\n"); +} + +/* IN: ipv4 and tcp header pointer, + * new ipv4 addr and port value + * main db index for accessing per vrf mss value + * DO: + * NAT + * mss adjust if needed + * ip & tcp checksum update (incremental) + */ + +inline void tcp_in2out_nat_mss_n_checksum (ipv4_header * ip, + tcp_hdr_type * tcp, + u32 ipv4_addr, + u16 port, + cnat_main_db_entry_t * db) +{ + u8 *mss_ptr; + u8 check_mss = 0; + u16 mss_old, mss_new; + cnat_vrfmap_t * vrf_map_p; + + cnat_v4_recalculate_tcp_checksum(ip, + tcp, + &(ip->src_addr), + &(tcp->src_port), + ipv4_addr, + port); + u16 frag_offset = + clib_net_to_host_u16(ip->frag_flags_offset); + + if(PREDICT_FALSE(frag_offset & IP_FRAG_OFFSET_MASK)) { + return; /* No TCP Header at all */ + } + + /* + * check SYN bit and if options field is present + * If yes, proceed to extract the options and get TCP MSS value + */ + check_mss = ((tcp->flags & TCP_FLAG_SYN) && + (((tcp->hdr_len>>4) << 2) > sizeof(tcp_hdr_type))); + + if (PREDICT_FALSE(check_mss)) { + + /* get per VRF mss config */ + if(PREDICT_FALSE(db->flags & (CNAT_DB_DSLITE_FLAG))) { + mss_new = dslite_table_db_ptr[db->dslite_nat44_inst_id].tcp_mss; + } else { + vrf_map_p = cnat_map_by_vrf + db->vrfmap_index; + mss_new = vrf_map_p->tcp_mss; + } + DSLITE_PRINTF(1, "Check MSS true..%u\n", mss_new); + /* + * If TCP MSS is not configured, skip the MSS checks + */ + if (PREDICT_FALSE(mss_new != V4_TCP_MSS_NOT_CONFIGURED_VALUE)) { + + /* if mss_ptr != NULL, then it points to MSS option */ + mss_ptr = tcp_findoption(tcp, TCP_OPTION_MSS); + + /* + * TCP option field: | kind 1B | len 1B | value 2B| + * where kind != [0,1] + */ + if (PREDICT_TRUE(mss_ptr && (mss_ptr[1] == 4))) { + + u16 *ptr = (u16*)(mss_ptr + 2); + + mss_old = clib_net_to_host_u16(*ptr); + + if (PREDICT_FALSE(mss_old > mss_new)) { + u32 sum32; + u16 mss_old_r, old_tcp_checksum_r; + + *ptr = clib_host_to_net_u16(mss_new); + + mss_old_r = ~mss_old; + + old_tcp_checksum_r = + ~clib_net_to_host_u16(tcp->tcp_checksum); + + /* + * Revise the TCP checksum + */ + sum32 = old_tcp_checksum_r + mss_old_r + mss_new; + FILL_CHECKSUM(tcp->tcp_checksum, sum32) + + if (PREDICT_FALSE(tcp_logging_enable_flag)) { + tcp_debug_logging( + clib_net_to_host_u32(tcp->seq_num), + clib_net_to_host_u32(tcp->ack_num), + 0, + 0, + mss_old, + mss_new, + 0, + 0, + ~old_tcp_checksum_r, + clib_net_to_host_u16(tcp->tcp_checksum)); + } + } + } + } + } +} + +u32 get_my_svi_intf_ip_addr() { + return 0x01010101; +} diff --git a/vnet/vnet/vcgn/cnat_v4_functions.h b/vnet/vnet/vcgn/cnat_v4_functions.h new file mode 100644 index 00000000000..047fe33cc4c --- /dev/null +++ b/vnet/vnet/vcgn/cnat_v4_functions.h @@ -0,0 +1,342 @@ +/* + *------------------------------------------------------------------ + * cnat_v4_functions.h + * + * Copyright (c) 2007-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_V4_FUNCTOINS__ +#define __CNAT_V4_FUNCTOINS__ + +#include "tcp_header_definitions.h" +#include "cnat_db.h" +#include "spp_ctx.h" + +#include "platform_common.h" + +/* + * Defines and structures to enable TCP packet logging + */ +#define TCP_LOGGING_DISABLE 0 +#define TCP_LOGGING_ENABLE 1 +#define TCP_LOGGING_PACKET_DUMP 2 +#define TCP_LOGGING_SUMMARY_DUMP 3 + +#define MAX_TCP_LOGGING_COUNT 1024 + +typedef struct tcp_logging_struct { + u32 seq_num; + u32 ack_num; + u32 old_ip; + u32 new_ip; + u16 old_port; + u16 new_port; + u16 old_ip_crc; + u16 new_ip_crc; + u16 old_tcp_crc; + u16 new_tcp_crc; +} tcp_logging_struct_t; + +void tcp_debug_logging_dump (void); +void tcp_debug_logging_enable_disable (u32 enable_flag); + +void +tcp_debug_logging ( + u32 seq_num, + u32 ack_num, + u32 old_ip, + u32 new_ip, + u16 old_port, + u16 new_port, + u16 old_ip_crc, + u16 new_ip_crc, + u16 old_tcp_crc, + u16 new_tcp_crc); + +#define JLI printf("%s %s %d\n", __FILE__, __FUNCTION__, __LINE__); fflush(stdout); + +#define CNAT_ICMP_DEST_UNREACHABLE 100 +#define INCREMENT_NODE_COUNTER(c) \ + em->counters[node_counter_base_index + c] += 1; + +#define V4_TCP_UPDATE_SESSION_FLAG(db, tcp) \ +if ((tcp->flags & TCP_FLAG_ACK) && (tcp->flags & TCP_FLAG_SYN)) { \ + db->flags |= CNAT_DB_FLAG_TCP_ACTIVE; \ +} \ +if ((tcp->flags & TCP_FLAG_RST) || (tcp->flags & TCP_FLAG_FIN)) { \ + db->flags &= ~CNAT_DB_FLAG_TCP_ACTIVE; \ + db->flags |= CNAT_DB_FLAG_TCP_CLOSING; \ +} + +#define V4_TCP_UPDATE_SESSION_DB_FLAG(sdb, tcp) \ +if ((tcp->flags & TCP_FLAG_ACK) && (tcp->flags & TCP_FLAG_SYN)) { \ + sdb->flags |= CNAT_DB_FLAG_TCP_ACTIVE; \ +} \ +if ((tcp->flags & TCP_FLAG_RST) || (tcp->flags & TCP_FLAG_FIN)) { \ + sdb->flags &= ~CNAT_DB_FLAG_TCP_ACTIVE; \ + sdb->flags |= CNAT_DB_FLAG_TCP_CLOSING; \ +} + +/* + * Code to recalculate checksum after ACK/SEQ number changes + * This macro assumes, we have pointer to tcp structure + * referenced by the name "tcp" + */ +#define CNAT_UPDATE_TCP_SEQ_ACK_CHECKSUM(old_val32, new_val32) \ +{ \ + u16 old_val_lower, old_val_upper, old_tcp_cr; \ + u16 new_val_lower, new_val_upper, new_tcp_cr; \ + u32 sum32; \ + \ + old_val_lower = ~((u16) old_val32); \ + old_val_upper = ~((u16) (old_val32 >> 16)); \ + old_tcp_cr = ~net2host16(&tcp->tcp_checksum); \ + new_val_lower = (u16) new_val32; \ + new_val_upper = (u16) (new_val32 >> 16); \ + \ + sum32 = old_val_lower + old_val_upper + old_tcp_cr + \ + new_val_lower + new_val_upper; \ + \ + sum32 = (sum32 & 0xffff) + ((sum32 >> 16) & 0xffff); \ + sum32 = (sum32 & 0xffff) + ((sum32 >> 16) & 0xffff); \ + new_tcp_cr = ~((u16)sum32); \ + \ + tcp->tcp_checksum = host2net16(new_tcp_cr); \ +} + +/* + * newchecksum = ~(~oldchecksum + ~old + new) + * old/new for l3 checksum: ip address + */ +#define CNAT_UPDATE_L3_CHECKSUM_DECLARE \ +u16 old_l3_1r, old_l3_2r; \ +u16 old_l3_cr, new_l3_c; \ +u32 new32; + +#define CNAT_UPDATE_L3_CHECKSUM(old_l3_1, old_l3_2, old_l3_c, \ + new_l3_1, new_l3_2) \ +old_l3_1r = ~(old_l3_1); \ +old_l3_2r = ~(old_l3_2); \ +old_l3_cr = ~(old_l3_c); \ +new32 = old_l3_cr + old_l3_1r + old_l3_2r + new_l3_1 + new_l3_2; \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new_l3_c = ~((u16)new32); + + +/* + * newchecksum = ~(~oldchecksum + ~old + new) + * old/new for l3 checksum: ip address + * old/new for l4 checksum: ip address and port + */ +#define CNAT_UPDATE_L3_L4_CHECKSUM_DECLARE \ +u16 old_l3_1r, old_l3_2r, old_l4r; \ +u16 old_l3_cr, old_l4_cr; \ +u16 new_l3_c, new_l4_c; \ +u32 sum32, new32; + +#define CNAT_UPDATE_L3_L4_CHECKSUM(old_l3_1, old_l3_2, old_l4, \ + old_l3_c, old_l4_c, \ + new_l3_1, new_l3_2, new_l4) \ +old_l3_1r = ~(old_l3_1); \ +old_l3_2r = ~(old_l3_2); \ +old_l3_cr = ~(old_l3_c); \ +sum32 = old_l3_1r + old_l3_2r + new_l3_1 + new_l3_2; \ +new32 = old_l3_cr + sum32; \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new_l3_c = ~((u16)new32); \ +old_l4r = ~(old_l4); \ +old_l4_cr = ~(old_l4_c); \ +sum32 += old_l4r + new_l4; \ +new32 = old_l4_cr + sum32; \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new_l4_c = ~((u16)new32); + +/* + * For ICMP checksums, we don't use the top IP header for checksum calculation + */ +#define CNAT_UPDATE_L3_ICMP_CHECKSUM(old_l3_1, old_l3_2, old_l4, \ + old_l3_c, old_l4_c, \ + new_l3_1, new_l3_2, new_l4) \ +old_l3_1r = ~(old_l3_1); \ +old_l3_2r = ~(old_l3_2); \ +old_l3_cr = ~(old_l3_c); \ +sum32 = old_l3_1r + old_l3_2r + new_l3_1 + new_l3_2; \ +new32 = old_l3_cr + sum32; \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new_l3_c = ~((u16)new32); \ +old_l4r = ~(old_l4); \ +old_l4_cr = ~(old_l4_c); \ +sum32 = old_l4r + new_l4; \ +new32 = old_l4_cr + sum32; \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new_l4_c = ~((u16)new32); + + +/* + * icmp error type message: + * newchecksum = ~(~oldchecksum + ~old + new) + * old/new for outlayer ip checksum: ip address + * old/new for outlayer icmp checksum: + * out-layer: ip address + * inner-layer: ip addr, port, l3 checksum, l4 checksum + */ +#define CNAT_UPDATE_ICMP_ERR_CHECKSUM_DECLARE \ +u16 old_ip_1r, old_ip_2r, old_ip_port_r, old_ip_cr, old_icmp_cr; \ +u16 new_icmp_c; \ +u32 sum32; + + +#define CNAT_UPDATE_ICMP_ERR_CHECKSUM(old_ip_1, old_ip_2, old_ip_port, old_ip_c, old_icmp_c, \ + new_ip_1, new_ip_2, new_ip_port, new_ip_c) \ +old_ip_1r = ~(old_ip_1); \ +old_ip_2r = ~(old_ip_2); \ +old_ip_port_r = ~(old_ip_port); \ +old_ip_cr = ~(old_ip_c); \ +old_icmp_cr = ~(old_icmp_c); \ +sum32 = old_ip_1r + old_ip_2r + new_ip_1 + new_ip_2 + \ + old_ip_port_r + new_ip_port + old_ip_cr + new_ip_c; \ +new32 = old_icmp_cr + sum32; \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new32 = (new32 & 0xffff) + ((new32 >> 16) & 0xffff); \ +new_icmp_c = ~((u16)new32); \ + +/* + * Add the two 16 bit parts of the 32 bit field + * Repeat it one more time to take care of any overflow + * Complement the u16 value and store it in network format + */ +#define FILL_CHECKSUM(checksum_field, sum32) { \ + sum32 = (sum32 & 0xffff) + ((sum32>>16) & 0xffff); \ + sum32 = (sum32 & 0xffff) + ((sum32>>16) & 0xffff); \ + checksum_field = clib_host_to_net_u16(~((u16) sum32)); \ +} + +static inline void +cnat_v4_recalculate_tcp_checksum (ipv4_header *ip, + tcp_hdr_type *tcp, + u32 *ip_addr_ptr, + u16 *tcp_port_addr_ptr, + u32 new_ip, + u16 new_port) +{ + u32 old_ip_addr, old_ip32_r, new_ip32, sum32; + u16 old_port_r, old_ip_checksum_r, old_tcp_checksum_r; + + u16 *p16; + + p16 = (u16*) ip_addr_ptr; + + old_ip_addr = *ip_addr_ptr; + old_ip32_r = (((u16) ~clib_net_to_host_u16(*p16)) + + ((u16) ~clib_net_to_host_u16(*(p16+1)))); + + old_port_r = ~clib_net_to_host_u16(*tcp_port_addr_ptr); + + *ip_addr_ptr = clib_host_to_net_u32(new_ip); + + new_ip32 = (new_ip & 0xffff) + ((new_ip >> 16) & 0xffff); + + old_ip_checksum_r = ~clib_net_to_host_u16(ip->checksum); + + /* + * Recalculate the new IP checksum + */ + sum32 = old_ip32_r + new_ip32 + old_ip_checksum_r; + + FILL_CHECKSUM(ip->checksum, sum32); + + u16 frag_offset = + clib_net_to_host_u16((ip->frag_flags_offset)); + + if(PREDICT_FALSE(frag_offset & IP_FRAG_OFFSET_MASK)) { + return; /* No need to update TCP fields */ + } + + *tcp_port_addr_ptr = clib_host_to_net_u16(new_port); + old_tcp_checksum_r = ~clib_net_to_host_u16(tcp->tcp_checksum); + + /* + * Recalculate the new TCP checksum + */ + sum32 = old_ip32_r + new_ip32 + + old_port_r + new_port + old_tcp_checksum_r; + + FILL_CHECKSUM(tcp->tcp_checksum, sum32); + + if (PREDICT_FALSE(tcp_logging_enable_flag)) { + tcp_debug_logging( + clib_net_to_host_u32(tcp->seq_num), + clib_net_to_host_u32(tcp->ack_num), + clib_net_to_host_u32(old_ip_addr), + clib_net_to_host_u32(*ip_addr_ptr), + ~old_port_r, + clib_net_to_host_u16(*tcp_port_addr_ptr), + ~old_ip_checksum_r, + clib_net_to_host_u16(ip->checksum), + ~old_tcp_checksum_r, + clib_net_to_host_u16(tcp->tcp_checksum)); + } +} + + +inline void tcp_in2out_nat_mss_n_checksum (ipv4_header *ip, + tcp_hdr_type *tcp, + u32 ipv4_addr, + u16 port, + cnat_main_db_entry_t * db); + +void hex_dump(u8 * p, int len); + +u32 get_my_svi_intf_ip_addr(); + +/* + * in cnat_v4_icmp_gen.c, + * return 1 if icmp msg allow to generate + * for this user + */ + +u32 icmp_msg_gen_allowed (); + +cnat_icmp_msg_t v6_icmp_msg_gen_allowed(); + +int v4_crc_zero_udp_allowed(); +void ipv4_decr_ttl_n_calc_csum(ipv4_header *ipv4); +int icmpv4_generate_with_throttling (spp_ctx_t *ctx, ipv4_header *ipv4, + u16 rx_uidb_index); + +int icmpv6_generate_with_throttling (spp_ctx_t *ctx, ipv6_header_t *ipv4, + u16 rx_uidb_index); + +void icmp_error_generate_v6(spp_ctx_t *ctx, u8 icmp_type, + u8 icmp_code, u16 uidb_index); + +void calculate_window_scale(tcp_hdr_type *tcp_header, u8 *scale); + +void cnat_log_nat44_tcp_seq_mismatch( + cnat_main_db_entry_t *db, + cnat_vrfmap_t *vrfmap); +void print_icmp_pkt (ipv4_header *ip); +void print_udp_pkt (ipv4_header *ip); +void print_tcp_pkt (ipv4_header *ip); +void print_ipv6_pkt (ipv6_header_t *ip); + + +#endif + diff --git a/vnet/vnet/vcgn/cnat_v4_pptp_alg.h b/vnet/vnet/vcgn/cnat_v4_pptp_alg.h new file mode 100644 index 00000000000..5a6d4243165 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_v4_pptp_alg.h @@ -0,0 +1,150 @@ +/* + *------------------------------------------------------------------ + * cnat_v4_pptp_alg.h + * + * Copyright (c) 2009-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_V4_PPTP_ALG_H__ +#define __CNAT_V4_PPTP_ALG_H__ + +/* Debug utils of PPTP */ +#define PPTP_DBG(debug, ...) \ + if(PREDICT_FALSE(cnat_pptp_debug_flag >= debug)) { \ + PLATFORM_DEBUG_PRINT("%s:%s:%d - ", \ + __FILE__, __FUNCTION__, __LINE__);\ + PLATFORM_DEBUG_PRINT(__VA_ARGS__);\ + PLATFORM_DEBUG_PRINT("\n"); \ + } + +#define PPTP_DUMP_PACKET(ip, len) pptp_hex_dump(ip, len) + + +#define PPTP_DISABLED 0 +#define PPTP_ENABLED 1 + +#define PPTP_GRE_TIMEOUT 60 /*sec */ + +#define TCP_PPTP_PORT 1723 + +#define PPTP_PAC 0 +#define PPTP_PNS 1 + +/* PPTP MSG TYPE */ + +#define PPTP_MSG_TYPE_CONTROL 1 +#define PPTP_MSG_TYPE_MGMT 2 + +/* PPTP control messages */ + +/* control connection mgmt */ +#define PPTP_START_CC_RQ 1 +#define PPTP_START_CC_RP 2 +#define PPTP_STOP_CC_RQ 3 +#define PPTP_STOP_CC_RP 4 +#define PPTP_ECHO_RQ 5 +#define PPTP_ECHO_RP 6 + +/* call mgmt */ +#define PPTP_OBOUND_CALL_RQ 7 +#define PPTP_OBOUND_CALL_RP 8 +#define PPTP_IBOUND_CALL_RQ 9 +#define PPTP_IBOUND_CALL_RP 10 +#define PPTP_IBOUND_CALL_CN 11 +#define PPTP_CALL_CLEAR_RQ 12 +#define PPTP_CALL_DISCON_NT 13 + +/* other */ + +#define PPTP_WAN_ERR_NT 14 +#define PPTP_SET_LINK_INF 15 + +#define PPTP_MIN_HDR_LEN 8 + +/* Byte offsets from start of TCP Data(PPTP header) */ + +#define PPTP_CTRL_MGMT_TYPE_OFFSET 0x02 +#define PPTP_CC_TYPE_OFFSET 0x08 +#define PPTP_HDR_CALL_ID_OFFSET 0x0c +#define PPTP_HDR_PEER_CALL_ID_OFFSET 0x0e + +#define PPTP_HDR_RESULT_CODE_OFFSET_STCCRP 0x0e +#define PPTP_HDR_RESULT_CODE_OFFSET 0x10 + + +/* Offset of control/mgmt msg types + from start of TCP header */ + +#define TCP_HEADER_SIZE(tcp) \ + ((tcp->hdr_len>>4) << 2) + + +#define PPTP_MSG_START_OFFSET(tcp) \ + ((u8*)tcp + TCP_HEADER_SIZE(tcp)) + + +#define PPTP_CC_MSG_TYPE_OFFSET(tcp) \ + (PPTP_MSG_START_OFFSET(tcp) + \ + PPTP_CC_TYPE_OFFSET ) + +#define PPTP_MGMT_MSG_TYPE_OFFSET(tcp) \ + ( PPTP_MSG_START_OFFSET(tcp) + \ + PPTP_CTRL_MGMT_TYPE_OFFSET ) + +#define PPTP_CALL_ID_OFFSET(tcp) \ + ( PPTP_MSG_START_OFFSET(tcp) + \ + PPTP_HDR_CALL_ID_OFFSET ) + +#define PPTP_PEER_CALL_ID_OFFSET(tcp) \ + ( PPTP_MSG_START_OFFSET(tcp) + \ + PPTP_HDR_PEER_CALL_ID_OFFSET ) + +#define PPTP_RESULT_CODE_OFFSET(tcp) \ + ( PPTP_MSG_START_OFFSET(tcp) + \ + PPTP_HDR_RESULT_CODE_OFFSET ) + +#define PPTP_RESULT_CODE_OFFSET_STCCRP(tcp) \ + ( PPTP_MSG_START_OFFSET(tcp) + \ + PPTP_HDR_RESULT_CODE_OFFSET_STCCRP) + +/* values */ +#define PPTP_CC_MSG_TYPE(tcp) \ + (u16*)PPTP_CC_MSG_TYPE_OFFSET(tcp) + +#define PPTP_MGMT_MSG_TYPE(tcp) \ + (u16*)PPTP_MGMT_MSG_TYPE_OFFSET(tcp) + +#define PPTP_CALL_ID(tcp) \ + (u16*)PPTP_CALL_ID_OFFSET(tcp) + +#define PPTP_PEER_CALL_ID(tcp) \ + (u16*)PPTP_PEER_CALL_ID_OFFSET(tcp) + +#define PPTP_RESULT_CODE(tcp) \ + *(u8*)PPTP_RESULT_CODE_OFFSET(tcp); + +#define PPTP_RESULT_CODE_STCCRP(tcp) \ + *(u8*)PPTP_RESULT_CODE_OFFSET_STCCRP(tcp); + + +/* other code */ +#define PPTP_CHAN_SUCCESS 1 + + +/* Data structures */ + +extern u32 cnat_pptp_debug_flag; + +#endif /* __CNAT_V4_PPTP_ALG_H__ */ diff --git a/vnet/vnet/vcgn/cnat_v4_tcp_in2out_stages.c b/vnet/vnet/vcgn/cnat_v4_tcp_in2out_stages.c new file mode 100644 index 00000000000..220ced461aa --- /dev/null +++ b/vnet/vnet/vcgn/cnat_v4_tcp_in2out_stages.c @@ -0,0 +1,679 @@ +/* + *--------------------------------------------------------------------------- + * cnat_v4_tcp_in2out_stages.c - cnat_v4_tcp_in2out node pipeline stage functions + * + * + * Copyright (c) 2008-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/buffer.h> + +#include "cnat_db.h" +/* #include <cnat_feature_data.h> */ +#include "ipv4_packet.h" +#include "tcp_header_definitions.h" +#include "cnat_config.h" +#include "cnat_global.h" +#include "cnat_v4_functions.h" +#include "cnat_v4_ftp_alg.h" +#include "cnat_v4_pptp_alg.h" + +#define foreach_cnat_ipv4_tcp_inside_input_error \ +_(TCP_NAT_IN, "packets received") \ +_(TCP_NAT, "packets NATed") \ +_(TCP_EXCEPTION, "packets to exception") \ +_(TCP_TTL_GEN, "Generated TTL Expiry ICMP packet") \ +_(TCP_TTL_DROP, "Could not generate TTL Expiry ICMP packet") \ +_(TCP_SESSION_DROP, "Could not generate session") \ +_(TCP_FRAG_DROP, "Non-first Fragment received") + +typedef enum { +#define _(sym,str) sym, + foreach_cnat_ipv4_tcp_inside_input_error +#undef _ + CNAT_IPV4_TCP_INSIDE_INPUT_N_ERROR, +} cnat_ipv4_tcp_inside_input_t; + +static char * cnat_ipv4_tcp_inside_input_error_strings[] = { +#define _(sym,string) string, + foreach_cnat_ipv4_tcp_inside_input_error +#undef _ + + + +typedef struct cnat_v4_tcp_in2out_pipeline_data_ { + spp_node_main_vector_t *nmv; + /* Add additional pipeline stage data here... */ + u32 bucket; + u16 src_port; /* Added for handling fragments */ + u16 dst_port; /* Added for handling fragments */ +} cnat_v4_tcp_in2out_pipeline_data_t; + +static cnat_v4_tcp_in2out_pipeline_data_t pctx_data[SPP_MAXDISPATCH]; + +#define EXTRA_PIPELINE_ARGS_PROTO , cnat_v4_tcp_in2out_pipeline_data_t *pctx +#define EXTRA_PIPELINE_ARGS , pctx + +ALWAYS_INLINE( +static inline void +stage0(spp_ctx_t **ctxs, int index, spp_node_t *np, + u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO)) +{ + spp_ctx_t *ctx = ctxs[index]; + /* + * Prefetch the context header. This is almost always + * the right thing to do + */ + SPP_PREFETCH_CTX(ctx); +} + +ALWAYS_INLINE( +static inline void +stage1(spp_ctx_t **ctxs, int index, spp_node_t *np, + u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO)) +{ + spp_ctx_t *ctx = ctxs[index]; + /* got ctx, prefetch packet data separately */ + SPP_PREFETCH_CTX_DATA(ctx, 1*CACHE_DATA_QUANTUM); +} + +ALWAYS_INLINE( +static inline void +stage2(spp_ctx_t **ctxs, int index, spp_node_t *np, + u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO)) +{ + spp_ctx_t *ctx = ctxs[index]; + u64 a, b, c; + u32 bucket; + cnat_feature_data_t *fd = (cnat_feature_data_t *)ctx->feature_data; + ipv4_header *ip; + tcp_hdr_type * tcp; + u8 *prefetch_target; + + INCREMENT_NODE_COUNTER(np, TCP_NAT_IN); + + /* extract the key from ctx and save it to feature_data */ + + ip = (ipv4_header *)(ctx->current_header); + ctx->application_start = (ip->version_hdr_len_words & 0xf) << 2; + tcp = (tcp_hdr_type*) ((u8 *)ip + ctx->application_start); + + PLATFORM_CNAT_SET_RX_VRF(ctx,fd->dbl.k.k.vrf, CNAT_TCP, 1); + fd->dbl.k.k.ipv4 = spp_net_to_host_byte_order_32(&ip->src_addr); + + if(PREDICT_FALSE(ctx->ru.rx.frag)) { + /* Must have routed through cnat_v4_frag_in2out node + * Since feature data of the ctx is being used for other + * purposes here, copy them to extra stage argument + */ + u16 *feature_data_ports = (u16 *)&ctx->feature_data[2]; + pctx[index].src_port = fd->dbl.k.k.port = *feature_data_ports; + feature_data_ports++; + pctx[index].dst_port = *feature_data_ports; + } else { + fd->dbl.k.k.port = spp_net_to_host_byte_order_16(&tcp->src_port); + pctx[index].dst_port = + spp_net_to_host_byte_order_16(&tcp->dest_port); + } + +#if 0 + /* extra info for evil mode, or default value for dst_ipv4 field in good mode */ + fd->dbl.dst_ipv4 = address_dependent_filtering ? + spp_net_to_host_byte_order_32(&ip->dest_addr) : 0; +#endif + + CNAT_V4_GET_HASH(fd->dbl.k.key64, + bucket, CNAT_MAIN_HASH_MASK) + + prefetch_target = (u8 *)(&cnat_in2out_hash[bucket]); + pctx[index].bucket = bucket; + + /* Prefetch the hash bucket */ + SPP_PREFETCH(prefetch_target, 0, LOAD); + +} + +ALWAYS_INLINE( +static inline void +stage3(spp_ctx_t **ctxs, int index, spp_node_t *np, + u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO)) +{ + u32 db_index; + u32 bucket; + uword prefetch_target0, prefetch_target1; + + bucket = pctx[index].bucket; + + /* read the hash bucket */ + db_index = pctx[index].bucket = cnat_in2out_hash[bucket].next; + if (PREDICT_TRUE(db_index != EMPTY)) { + + /* + * Prefetch database keys. We save space by not cache-line + * aligning the DB entries. We don't want to waste LSU + * bandwidth prefetching stuff we won't need. + */ + + prefetch_target0 = (uword)(cnat_main_db + db_index); + + SPP_PREFETCH(prefetch_target0, 0, LOAD); + + /* Just beyond DB key #2 */ + + prefetch_target1 = prefetch_target0 + + STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports); + + /* If the targets are in different lines, do the second prefetch */ + + if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) != + (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) { + + SPP_PREFETCH(prefetch_target1, 0, LOAD); + + } + } +} + +static inline void +stage4(spp_ctx_t **ctxs, int index, spp_node_t *np, + u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO) +{ + spp_ctx_t *ctx = ctxs[index]; + u32 db_index = pctx[index].bucket; + cnat_main_db_entry_t *db; + cnat_feature_data_t *fd; + + /* + * Note: if the search already failed (empty bucket), + * the answer is already in the pipeline context structure + */ + if (PREDICT_FALSE(db_index == EMPTY)) { + return; + } + + fd = (cnat_feature_data_t *)ctx->feature_data; + + /* + * Note: hash collisions suck. We can't easily prefetch around them. + * The first trip around the track will be fast. After that, maybe + * not so much... + */ + do { + + db = cnat_main_db + db_index; + if (PREDICT_TRUE(db->in2out_key.key64 == fd->dbl.k.key64)) + break; + db_index = db->in2out_hash.next; + + } while (db_index != EMPTY); + + /* even in evil mode, for in2out, we nat all packets regardless mode and dst_ip */ + + /* Stick the answer back into the pipeline context structure */ + pctx[index].bucket = db_index; +} + +ALWAYS_INLINE( +static inline void +stage5(spp_ctx_t **ctxs, int index, spp_node_t *np, + u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO)) +{ + spp_ctx_t *ctx = ctxs[index]; + u32 db_index = pctx[index].bucket; + cnat_feature_data_t *fd = (cnat_feature_data_t *)ctx->feature_data; + int disposition; + cnat_main_db_entry_t *db; + /* Below two pointers are just to keep the cnat_ftp_alg call happy*/ + dslite_table_entry_t *dslite_entry_ptr = NULL; + ipv6_header_t *ipv6_hdr = NULL; + tcp_hdr_type *tcp; + ipv4_header *ip; + i8 delta; + u32 seq, seq1; + u32 window; + u8 scale; + int rc; + + ip = (ipv4_header *) ctx->current_header; + + if (PLATFORM_HANDLE_TTL_DECREMENT) { + if (PREDICT_FALSE(ip->ttl <= 1)) { + /* Try to generate ICMP error msg, as TTL is <= 1 */ + + if (icmpv4_generate_with_throttling + (ctx, ip, ctx->ru.rx.uidb_index)) { + /* Generated ICMP */ + disposition = CNAT_REWRITE_OUTPUT; + INCREMENT_NODE_COUNTER(np, TCP_TTL_GEN); + } else { + /* Could not generated ICMP - drop the packet */ + disposition = CNAT_DROP; + INCREMENT_NODE_COUNTER(np, TCP_TTL_DROP); + } + goto drop_pkt; + } + } + + if (PREDICT_FALSE(db_index == EMPTY)) { + if(PREDICT_FALSE(ctx->ru.rx.frag)) { + /* Must have routed through cnat_v4_frag_in2out node */ + u16 frag_offset = + spp_net_to_host_byte_order_16(&(ip->frag_flags_offset)); + if(PREDICT_FALSE(frag_offset & IP_FRAG_OFFSET_MASK)) { + INCREMENT_NODE_COUNTER(np, TCP_FRAG_DROP); + disposition = CNAT_DROP; + goto drop_pkt; + } else { + INCREMENT_NODE_COUNTER(np, TCP_EXCEPTION); + disposition = CNAT_V4_TCP_IE; + } + } else { + INCREMENT_NODE_COUNTER(np, TCP_EXCEPTION); + disposition = CNAT_V4_TCP_IE; + } + } else { + cnat_key_t dest_info; + cnat_session_entry_t *session_db = NULL; + db = cnat_main_db + db_index; + /* Handle destination sessions */ + tcp = (tcp_hdr_type*) ((u8*)ip + ctx->application_start); + dest_info.k.port = pctx[index].dst_port; + dest_info.k.ipv4 = spp_net_to_host_byte_order_32(&(ip->dest_addr)); + + if(PREDICT_TRUE(!PLATFORM_DBL_SUPPORT)) { + + /* No DBL support, so just update the destn and proceed */ + db->dst_ipv4 = dest_info.k.ipv4; + db->dst_port = dest_info.k.port; + goto update_pkt; + } + + if(PREDICT_FALSE(db->dst_ipv4 != dest_info.k.ipv4 || + db->dst_port != dest_info.k.port)) { + if(PREDICT_TRUE(db->nsessions == 0)) { + /* Should be a static entry + * Note this session as the first session and log + */ + cnat_add_dest_n_log(db, &dest_info); + } else if(PREDICT_FALSE(db->nsessions == 1)) { + /* Destn is not same as in main db. Multiple session + * scenario + */ + dest_info.k.vrf = db->in2out_key.k.vrf; + session_db = cnat_handle_1to2_session(db, &dest_info); + if(PREDICT_FALSE(session_db == NULL)) { + disposition = CNAT_DROP; + INCREMENT_NODE_COUNTER(np, TCP_SESSION_DROP); + goto drop_pkt; + } + } else { /* There are already multiple destinations */ + dest_info.k.vrf = db->in2out_key.k.vrf; + /* If session already exists, + * cnat_create_session_db_entry will return the existing db + * else create a new db + * If could not create, return NULL + */ + session_db = cnat_create_session_db_entry(&dest_info, + db, TRUE); + if(PREDICT_FALSE(session_db == NULL)) { + disposition = CNAT_DROP; + INCREMENT_NODE_COUNTER(np, TCP_SESSION_DROP); + goto drop_pkt; + } + } + if(PREDICT_TRUE(session_db)) { + /* Have to repeat the window size check for new destinations */ + window = (u32)spp_net_to_host_byte_order_16( + &tcp->window_size); + window = window << session_db->scale; + if(PREDICT_TRUE(!session_db->window)) { + calculate_window_scale(tcp, &scale); + session_db->scale = scale; + session_db->window = window; + } else if (PREDICT_FALSE(session_db->window < + window)) { + /* Update the db entry with window option from packet */ + session_db->window = window; + } else { + /* Do nothing */ + } + session_db->tcp_seq_num = spp_net_to_host_byte_order_32( + &tcp->seq_num); + session_db->ack_no = spp_net_to_host_byte_order_32( + &tcp->ack_num); + if (PREDICT_FALSE(global_debug_flag && CNAT_DEBUG_GLOBAL_ALL)) { + PLATFORM_DEBUG_PRINT("\n In2out SDB stages seq no = %u," + " ack no = %u, window = %u\n", + session_db->tcp_seq_num, + session_db->ack_no, + session_db->window); + } + } + } else { + //Update the seq no and ack no for subsequent communication + //after connection establishment + //No need to update window here. Window is already updated + //during connection establishment + window = (u32)spp_net_to_host_byte_order_16( + &tcp->window_size); + window = window << db->scale; + if(PREDICT_FALSE(!ALG_ENABLED_DB(db))) { + //This check is done since proto_data is part of union in main + //db entry + db->proto_data.tcp_seq_chk.seq_no = + spp_net_to_host_byte_order_32( + &tcp->seq_num); + db->proto_data.tcp_seq_chk.ack_no = + spp_net_to_host_byte_order_32( + &tcp->ack_num); + } + if (PREDICT_FALSE(db->diff_window < window)) { + /* Update the db entry with window option from packet */ + db->diff_window = window; + } + if (PREDICT_FALSE(global_debug_flag && CNAT_DEBUG_GLOBAL_ALL)) { + PLATFORM_DEBUG_PRINT("\n In2out MainDB seq no = %u," + "\n ack no = %u\n", + db->proto_data.tcp_seq_chk.seq_no, + db->proto_data.tcp_seq_chk.ack_no); + PLATFORM_DEBUG_PRINT("\n In2out MAINDB window = %u\n", + db->diff_window); + } + } +update_pkt: + + INCREMENT_NODE_COUNTER(np, TCP_NAT); + + disposition = CNAT_REWRITE_OUTPUT; + + /* NAT the packet and update checksum (increamental) */ + + /* If it is a non-first fragment, we need not worry about + * ALGs as the packet does not have TCP header.. + * However, under a very race scenario when this non-first + * fragment is containing an FTP PORT command OR RTSP command + * we cannot handle that case.. in that case the ALG will fail + * Do not want to add a lot of complexity to handle one in million + * of such ALG case + */ + u16 frag_offset = + spp_net_to_host_byte_order_16(&(ip->frag_flags_offset)); + + if(PREDICT_FALSE(frag_offset & IP_FRAG_OFFSET_MASK)) { + /* Non first fragment.. no TCP header */ + FTP_ALG_DEBUG_PRINTF("Non first frag.. cannot handle ALG"); + goto handle_ttl_n_checksum; + } + + FTP_ALG_DEBUG_PRINTF("src port 0x%x, dst_port 0x%x", + spp_net_to_host_byte_order_16(&tcp->src_port), + spp_net_to_host_byte_order_16(&tcp->dest_port)) + + /* handle FTP ALG */ + if (PREDICT_FALSE(ftp_alg_enabled && + (spp_net_to_host_byte_order_16(&tcp->src_port) == 21 || + spp_net_to_host_byte_order_16(&tcp->dest_port) == 21))) { + + if(PREDICT_FALSE((db->flags & CNAT_DB_FLAG_PPTP_TUNNEL_ACTIVE) || + (db->flags & CNAT_DB_FLAG_PPTP_TUNNEL_INIT))) + { + /* FTP on a PPTP Control session? Ignore FTP */ + goto handle_ttl_n_checksum; + } + + if (PREDICT_FALSE(tcp->flags & (TCP_FLAG_SYN | TCP_FLAG_RST | + TCP_FLAG_FIN))) { + + FTP_ALG_DEBUG_PRINTF("SYN Case setting delta = 0") + + /* reset the delta */ + if(PREDICT_FALSE(session_db != NULL)) { + session_db->alg.delta = 0; + } else { + db->alg.delta = 0; + } + + } else { + + /* need to adjust seq # for in2out pkt if delta is not 0 */ + if (PREDICT_TRUE((session_db && (session_db->alg.delta != 0)) + || ((!session_db) && (db->alg.delta != 0)))) { + seq = net2host32(&tcp->seq_num); + + FTP_ALG_DEBUG_PRINTF("Orig Seq Num 0x%x", seq) + /* + * for ftp packets, due to PORT command translation, + * we may have cases that a packet/payload len gets + * changed for tcp, we need to adjust the packet's + * sequence numbers to match the changes. The delta + * of orig pkt len and new len is in alg_dlt[1] together + * with the sequence number that cuased the delta. When + * there are multiple len changes, we keep theprevious + * delta in alg_dlt[0] for case like pkt retransmission. + * So depends on packet seq number, we decide to use + * either latest delta or previous delta ([0]) + * We won't be here if both delta values are 0 + */ + if(PREDICT_FALSE(session_db != NULL)) { + seq1 = seq > session_db->tcp_seq_num ? + (seq + session_db->alg.alg_dlt[1]): + (seq + session_db->alg.alg_dlt[0]); + } else { + seq1 = seq > db->proto_data.seq_pcp.tcp_seq_num ? + (seq + db->alg.alg_dlt[1]): + (seq + db->alg.alg_dlt[0]); + } + + FTP_ALG_DEBUG_PRINTF("Old_seq_num 0x%x New Seq Num 0x%x", + seq, seq1) + + if (PREDICT_TRUE(seq1 != seq)) { + + tcp->seq_num = host2net32(seq1); + + FTP_ALG_DEBUG_PRINTF("Old TCP Checksum 0x%x", + net2host16(&tcp->tcp_checksum)) + + /* + * fix checksum incremental for seq # changes + * newchecksum = ~(~oldchecksum + ~old + new) + */ + CNAT_UPDATE_TCP_SEQ_ACK_CHECKSUM(seq, seq1) + } /* There is a diff in seq */ + + } /* ALG Delta is non zero */ + + rc = cnat_ftp_alg((u8*) ip, &delta, db, dslite_entry_ptr, ipv6_hdr); + + FTP_ALG_DEBUG_PRINTF("cnat_ftp_alg rc 0x%x", rc) + + /*if located PORT cmd, packet being updated, take the delta and seq # */ + if (PREDICT_FALSE(rc)) { + + /* set alg flag for this ftp control connection */ + if(PREDICT_FALSE(session_db != NULL)) { + session_db->flags |= CNAT_DB_FLAG_ALG_CTRL_FLOW; + } else { + db->flags |= CNAT_DB_FLAG_ALG_CTRL_FLOW; + } + + /* + * rc != 0 indicates this packet has triggered a new pkt len delta + * we need to update db entry's seq# with seq# of this packet. + * + * Move alg_dlt[1] to [0], (current delta -> previous delta) + * then apply latest delta to alg_dlt[1] (keep [1] as latest delta) + */ + if(PREDICT_FALSE(session_db != NULL)) { + session_db->tcp_seq_num = net2host32(&tcp->seq_num); + session_db->alg.alg_dlt[0] = session_db->alg.alg_dlt[1]; + + /* accumulate the delta ! */ + session_db->alg.alg_dlt[1] += delta; + FTP_ALG_DEBUG_PRINTF( + "cnat_ftp_alg seq_num 0x%x, dlt0 0x%x, dlt1 0x%x", + session_db->tcp_seq_num, + session_db->alg.alg_dlt[0], + session_db->alg.alg_dlt[1]) + + } else { + db->proto_data.seq_pcp.tcp_seq_num = net2host32(&tcp->seq_num); + db->alg.alg_dlt[0] = db->alg.alg_dlt[1]; + + /* accumulate the delta ! */ + db->alg.alg_dlt[1] += delta; + + FTP_ALG_DEBUG_PRINTF( + "cnat_ftp_alg seq_num 0x%x, dlt0 0x%x, dlt1 0x%x", + db->proto_data.seq_pcp.tcp_seq_num, + db->alg.alg_dlt[0], + db->alg.alg_dlt[1]) + } + ctx->current_length += delta; + }/* cnat_ftp_alg returned non zero */ + } /* It is not a SYN, RST or FIN */ + } else if (PREDICT_FALSE(rtsp_alg_port_num && + ((spp_net_to_host_byte_order_16(&tcp->dest_port) == rtsp_alg_port_num) || + (spp_net_to_host_byte_order_16(&tcp->src_port) == rtsp_alg_port_num))) ) { + + if (PREDICT_FALSE(tcp->flags & (TCP_FLAG_SYN | TCP_FLAG_RST | + TCP_FLAG_FIN))) { + + FTP_ALG_DEBUG_PRINTF("SYN Case setting delta = 0") + + /* reset the delta */ + if(PREDICT_FALSE(session_db != NULL)) { + session_db->alg.delta = 0; + } else { + db->alg.delta = 0; + } + + } else { +#define RTSP_ALG_DELTA_MASK 0xFF + /* need to adjust seq # for in2out pkt if delta is not 0 */ + if (PREDICT_FALSE((session_db && + (session_db->alg.delta & RTSP_ALG_DELTA_MASK) != 0) || + ((!session_db) && + (db->alg.delta & RTSP_ALG_DELTA_MASK) != 0))) { + seq = net2host32(&tcp->seq_num); + + if(PREDICT_FALSE(session_db != NULL)) { + seq1 = seq > session_db->tcp_seq_num ? + (seq + db->alg.alg_dlt[1]): + (seq + db->alg.alg_dlt[0]); + } else { + seq1 = seq > db->proto_data.seq_pcp.tcp_seq_num ? + (seq + db->alg.alg_dlt[1]): + (seq + db->alg.alg_dlt[0]); + } + + FTP_ALG_DEBUG_PRINTF("Old_seq_num 0x%x New Seq Num 0x%x", + seq, seq1) + + if (PREDICT_TRUE(seq1 != seq)) { + + tcp->seq_num = host2net32(seq1); + + FTP_ALG_DEBUG_PRINTF("Old TCP Checksum 0x%x", + net2host16(&tcp->tcp_checksum)) + + /* + * fix checksum incremental for seq # changes + * newchecksum = ~(~oldchecksum + ~old + new) + */ + CNAT_UPDATE_TCP_SEQ_ACK_CHECKSUM(seq, seq1) + } + + } + } + if ((session_db && (!session_db->alg.il)) || + ((!session_db) && (!db->alg.il))) { + cnat_rtsp_alg((u8*) ip, + &delta, + db, + ctx->current_length, + NULL, + NULL); + } + } +handle_ttl_n_checksum: + if (PLATFORM_HANDLE_TTL_DECREMENT) { + /* + * Decrement TTL and update IPv4 checksum + */ + ipv4_decr_ttl_n_calc_csum(ip); + } + + tcp_in2out_nat_mss_n_checksum(ip, + tcp, + db->out2in_key.k.ipv4, + db->out2in_key.k.port, + db); +/* CNAT_PPTP_ALG_SUPPORT */ + /* code to handle pptp control msgs */ + if(PREDICT_FALSE( + (spp_net_to_host_byte_order_16(&tcp->dest_port) == + TCP_PPTP_PORT))) { + + u32 ret; + + PPTP_DBG(3, "PPTP mgmt/ctrl msg recieved"); + + ret = cnat_handle_pptp_msg(ctx, db , tcp, PPTP_PNS ); + + if( PREDICT_FALSE( ret != CNAT_SUCCESS) ) { + PPTP_DBG(3, "PPTP mgmt/ctrl msg drop"); + disposition = CNAT_DROP; + PPTP_INCR(ctrl_msg_drops); + goto drop_pkt; + } + } + +/* CNAT_PPTP_ALG_SUPPORT */ + + /* update transaltion counters */ + db->in2out_pkts++; + + in2out_forwarding_count++; + + PLATFORM_CNAT_SET_TX_VRF(ctx,db->out2in_key.k.vrf); + + /* update the timer for good mode, or evil mode dst_ip match */ + +// if (!address_dependent_filtering || fd->dbl.dst_ipv4 == db->dst_ipv4) { + if(PREDICT_FALSE(session_db != NULL)) { + V4_TCP_UPDATE_SESSION_DB_FLAG(session_db, tcp); + CNAT_DB_TIMEOUT_RST(session_db); + } else { + V4_TCP_UPDATE_SESSION_FLAG(db, tcp); + CNAT_DB_TIMEOUT_RST(db); + } + +// } + + } + + /* Pick up the answer and put it into the context */ + fd->dbl.db_index = db_index; + +drop_pkt: + + DISP_PUSH_CTX(np, ctx, disposition, disp_used, last_disposition, last_contexts_ptr, last_nused_ptr); + +} + diff --git a/vnet/vnet/vcgn/cnat_va_db.c b/vnet/vnet/vcgn/cnat_va_db.c new file mode 100644 index 00000000000..7423bdf2de2 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_va_db.c @@ -0,0 +1,286 @@ +/* + *------------------------------------------------------------------ + * cnat_va_db.c - virtual assembly database + * + * Copyright (c) 2009, 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <cnat_va_db.h> +#include <format.h> +#include <spp_node.h> +#include <spp_alloc.h> +#include <spp_byteorder.h> +#include <spp_main.h> +#include <spp_cache.h> +#include <spp_interface.h> +#include <spp_api.h> +#include <spp_client_api.h> +#include <spp_timers.h> +#include <cnat_db.h> +#include <spp_plugin.h> +#include <cnat_v4_functions.h> + + +va_bucket_t va_bucket[VA_BUCKETS]; + +void va_bucket_init () { + + u32 i; + + /* + * set the pointer in each bucket + * points to nowhere + */ + for (i=0; i<VA_BUCKETS; i++) { + va_bucket[i].next_available_entry = ~0; + } + +} + +inline void va_db_add_new_entry (u32 bucket_index, + va_lookup_key * key ) +{ + + va_entry_t * entry_p; + u32 head, next; + + entry_p = va_db_lookup(bucket_index, key); + + if (PREDICT_FALSE(entry_p)) { + FRAG_DEBUG_PRINTF6( + "\nVA_ADD_NEW: Bucket %d fnd Existng entry [%d, %d] -> [%d, %d]\n", + bucket_index, entry_p->src_port, + entry_p->dst_port, key->e.src_port, key->e.dst_port) + + /* found match entry, update it */ + entry_p->src_port = key->e.src_port; + entry_p->dst_port = key->e.dst_port; + + FRAG_DEBUG_PRINTF3("VA_ADD_NEW: Existing bucket %d, counter %d\n", + bucket_index, + va_bucket[bucket_index].new_entry_counter) + + } else { + + /* no match, add a new one */ + head = va_bucket[bucket_index].head_entry; + next = va_bucket[bucket_index].next_available_entry; + + FRAG_DEBUG_PRINTF5( + "\nVA_ADD_NEW: Filling bucket %d, index %d with key 0x%llx %x\n", + bucket_index, next, key->k.key64, key->k.key32) + + va_bucket[bucket_index].va_entry[next] = key->e; + + /* increase next pointer */ + va_bucket[bucket_index].next_available_entry = (next+1) & VA_BUCKET_MASK; + + if (PREDICT_FALSE(head == va_bucket[bucket_index].next_available_entry)) { + /* adjust head circular pointer */ + va_bucket[bucket_index].head_entry = (head+1) & VA_BUCKET_MASK; + } + + va_bucket[bucket_index].new_entry_counter++; + + FRAG_DEBUG_PRINTF4( + "VA_ADD_NEW: NEW bucket %d, entry %d counter %d\n", + bucket_index, next, va_bucket[bucket_index].new_entry_counter) + } +} + + +/* + * use the key, + * return pointer to the entry if found, + * NULL if not + */ + +inline +va_entry_t * va_db_lookup (u32 bucket_index, va_lookup_key * key) +{ + + u32 index, next; + va_entry_t * entry_p; + va_bucket_t * bucket; + + bucket = &va_bucket[bucket_index]; + index = bucket->head_entry; + next = bucket->next_available_entry; + entry_p = NULL; + + FRAG_DEBUG_PRINTF4( + "\nVA_DB_LOOKUP: bucket index %d head %d next %d\n", + bucket_index, index, next) + + /* loop through the entries in the bucket */ + while( index != next) { + + if(PREDICT_TRUE(memcmp(&bucket->va_entry[index], key, VA_KEY_SIZE)==0)) { + + entry_p = &bucket->va_entry[index]; + /*In add frag entry function we are again assigning key's src + port to entry_p's src port. So when a main DB entry is deleted/ + timed out, and again another entry is created for the same + src ip and src port pair, the frag's entry_p will have the + previous port info stored and not updated. Hence the below + line is not required*/ + + /* *(u32*)&key->e.src_port = *(u32*)&entry_p->src_port; */ + /* do two ports as u32 :) */ + + break; + } + + index = (index +1) & VA_BUCKET_MASK; + + } + +#ifdef FRAG_DEBUG + if (PREDICT_TRUE(entry_p)) { + FRAG_DEBUG_PRINTF3("VA_DB_LOOKUP: bucket index %d entry index %d\n", + bucket_index, index) + FRAG_DEBUG_PRINTF5("VA_DB_LOOKUP: SRC-->DST [0x%x, %d] [0x%x, %d]\n", + entry_p->src_ip, entry_p->src_port, + entry_p->dst_ip, entry_p->dst_port) + FRAG_DEBUG_PRINTF3("[vrf 0x%x, id 0x%x]\n", + entry_p->vrf, entry_p->ip_id) + } else { + FRAG_DEBUG_PRINTF1("\nNULL ENTRY\n") + } +#endif + + return entry_p; + +} + +inline +int va_db_delete_entry (u32 bucket_index, va_lookup_key * key) +{ + + u32 index, next; + int entry_found = 0; + va_bucket_t * bucket; + + bucket = &va_bucket[bucket_index]; + index = bucket->head_entry; + next = bucket->next_available_entry; + + FRAG_DEBUG_PRINTF4( + "\nVA_DB_DELETE_ENTRY: bucket index %d head %d next %d\n", + bucket_index, index, next); + + /* loop through the entries in the bucket */ + while( index != next) { + if(PREDICT_TRUE(memcmp(&bucket->va_entry[index], key, + VA_KEY_SIZE)==0)) { + /* Clear the entry */ + FRAG_DEBUG_PRINTF1("Entry found in delete API"); + memset(&bucket->va_entry[index], 0, sizeof(va_entry_t)); + entry_found = 1; + break; + } + index = (index +1) & VA_BUCKET_MASK; + } + return entry_found; +} + + + +void cnat_va_bucket_used (int argc, unsigned long * argv) +{ + + u32 i, sum = 0;; + + for(i=0;i<VA_BUCKETS;i++) { + + if(PREDICT_TRUE(va_bucket[i].new_entry_counter)) sum++; + + } + + if (PREDICT_FALSE(!sum)) { + printf("no bucket in use\n"); + return; + } + + printf("index head next counter (%d bucket in use)\n", sum); + + for(i=0;i<VA_BUCKETS;i++) { + + if (PREDICT_FALSE(!va_bucket[i].new_entry_counter)) continue; + + printf(" %04d %04d %04d %d\n", i, + va_bucket[i].head_entry, + va_bucket[i].next_available_entry, + va_bucket[i].new_entry_counter); + + } +} + +void cnat_va_dump (int argc, unsigned long * argv) +{ + + u32 i, sum, index ; + + PLATFORM_DEBUG_PRINT("====== SUMMARY ======\n"); + PLATFORM_DEBUG_PRINT("Total buckets: %d\n", VA_BUCKETS); + PLATFORM_DEBUG_PRINT("Entries per bucket: %d\n", VA_ENTRY_PER_BUCKET); + + sum = 0; + + for(i=0; i<VA_BUCKETS; i++) { + if (PREDICT_TRUE(va_bucket[i].new_entry_counter > 0)) sum ++; + } + + PLATFORM_DEBUG_PRINT("buckets in use: %d\n", sum); + + sum = 0; + for(i=0; i<VA_BUCKETS; i++) { + + if ( PREDICT_FALSE(((va_bucket[i].next_available_entry+1) & VA_BUCKET_MASK) + == va_bucket[i].head_entry)) { + + sum ++; + } + } + + PLATFORM_DEBUG_PRINT("bucket full: %d\n", sum); + + /* dump per bucket info */ + + if (argc == 0 ) return; + + index = (u32) argv[0]; + + if (PREDICT_FALSE(index >= VA_BUCKETS)) { + PLATFORM_DEBUG_PRINT("invalid bucket index %d\n", index); + return; + } + + PLATFORM_DEBUG_PRINT("\n====== Bucket %d ======\n", index); + + PLATFORM_DEBUG_PRINT("bucket head index %d\n", va_bucket[index].head_entry); + + PLATFORM_DEBUG_PRINT("bucket next index %d\n", va_bucket[index].next_available_entry); + + PLATFORM_DEBUG_PRINT(" source IP dest IP VRF ip-id srcP dstP\n"); + + for(i=0;i<VA_ENTRY_PER_BUCKET;i++) { + hex_dump((u8*)&va_bucket[index].va_entry[i], sizeof(va_entry_t)); + } + +} diff --git a/vnet/vnet/vcgn/cnat_va_db.h b/vnet/vnet/vcgn/cnat_va_db.h new file mode 100644 index 00000000000..6e0051b46f7 --- /dev/null +++ b/vnet/vnet/vcgn/cnat_va_db.h @@ -0,0 +1,121 @@ +/* + *------------------------------------------------------------------ + * cnat_va_db.h - definition for virtual assembly database + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __CNAT_VA_DB_H__ +#define __CNAT_VA_DB_H__ + +#include <clib_lite.h> + +#define FRAG_DEBUG 1 + +/* virtual assemble hash database size ~ 16B x 64K = 1MB */ + +#define VA_TOTAL_ENTRIES (64*1024) +#define VA_ENTRY_PER_BUCKET (8) /* make sure size is power of 2 for circular FIFO */ +#define VA_BUCKET_MASK (VA_ENTRY_PER_BUCKET -1) +#define VA_BUCKETS (VA_TOTAL_ENTRIES / VA_ENTRY_PER_BUCKET) +#define VA_KEY_SIZE 12 + +typedef struct _va_entry { + /* key: top 12 bytes */ + u32 src_ip; + u32 dst_ip; + u16 vrf; /* overloaded with protocol info with top two bits */ + u16 ip_id; + + /* values */ + u16 src_port; + u16 dst_port; +} va_entry_t; + +typedef struct _va_keys { + u64 key64; /* src & dst IP */ + u32 key32; /* vrf, protocol and ip_id */ +} va_keys; + +typedef union { + va_entry_t e; + va_keys k; +} va_lookup_key; + +typedef struct _va_bucket_t { + u32 head_entry; + u32 next_available_entry; /* ~0 for empty bucket */ + u32 new_entry_counter; /* for debug purpose */ + va_entry_t va_entry[VA_ENTRY_PER_BUCKET]; +} va_bucket_t; + +extern va_bucket_t va_bucket[]; /* hash table in cnat_va_db.c */ + +void va_bucket_init (); + +inline void va_db_add_new_entry (u32 bucket_index, va_lookup_key * ); +inline int va_db_delete_entry (u32 bucket_index, va_lookup_key * ); +inline va_entry_t * va_db_lookup (u32 bucket_index, va_lookup_key * key); + +#ifdef FRAG_DEBUG + +#define FRAG_DEBUG_PRINTF1(a) \ + if (frag_debug_flag) { \ + PLATFORM_DEBUG_PRINT(a); \ + } + +#define FRAG_DEBUG_PRINTF2(a, b) \ + if (frag_debug_flag) { \ + PLATFORM_DEBUG_PRINT(a, b); \ + } + +#define FRAG_DEBUG_PRINTF3(a, b, c) \ + if (frag_debug_flag) { \ + PLATFORM_DEBUG_PRINT(a, b, c); \ + } + +#define FRAG_DEBUG_PRINTF4(a, b, c, d) \ + if (frag_debug_flag) { \ + PLATFORM_DEBUG_PRINT(a, b, c, d); \ + } + +#define FRAG_DEBUG_PRINTF5(a, b, c, d, e) \ + if (frag_debug_flag) { \ + PLATFORM_DEBUG_PRINT(a, b, c, d, e); \ + } + +#define FRAG_DEBUG_PRINTF6(a, b, c, d, e, f) \ + if (frag_debug_flag) { \ + PLATFORM_DEBUG_PRINT(a, b, c, d, e, f); \ + } +#else + +#define FRAG_DEBUG_PRINTF1(a) + +#define FRAG_DEBUG_PRINTF2(a, b) + +#define FRAG_DEBUG_PRINTF3(a, b, c) + +#define FRAG_DEBUG_PRINTF4(a, b, c, d) + +#define FRAG_DEBUG_PRINTF5(a, b, c, d, e) + +#define FRAG_DEBUG_PRINTF6(a, b, c, d, e, f) + +#endif + +#endif /* __CNAT_VA_DB_H__ */ + + diff --git a/vnet/vnet/vcgn/dslite_db.h b/vnet/vnet/vcgn/dslite_db.h new file mode 100644 index 00000000000..2269b98c989 --- /dev/null +++ b/vnet/vnet/vcgn/dslite_db.h @@ -0,0 +1,170 @@ +/* + *------------------------------------------------------------------ + * dslite_db.h - Stateful DSLITE translation database definitions + * + * Copyright (c) 2010-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#ifndef __DSLITE_DB_H__ +#define __DSLITE_DB_H__ + +#include "cnat_cli.h" +#include "index_list.h" +#include "cnat_ports.h" +#include "cnat_db.h" +#include "dslite_defs.h" + +#define DSLITE_PRINTF(level, ...) \ + if (dslite_debug_level > level) PLATFORM_DEBUG_PRINT(__VA_ARGS__); +/* +#define DSLITE_PRINTF(lvl, ...) \ +{ \ + avsm_dispatlib_debug (__VA_ARGS__); \ +} +*/ + +#define HASH_ENHANCE 4 +//#define DSLITE_DEF +#define DSLITE_MAIN_DB_SIZE (20000000 / PLATFORM_CNAT_INSTS) +#define DSLITE_MAIN_HASH_SIZE \ + (HASH_ENHANCE * PLATFORM_CNAT_MAIN_PRELIM_HASH_SIZE) + +#define DSLITE_MAIN_HASH_MASK (DSLITE_MAIN_HASH_SIZE-1) + + +/* nb: 200000 users / 64 CNAT = 3125, 76% occupancy */ +#define DSLITE_USER_HASH_SIZE CNAT_USER_HASH_SIZE +#define DSLITE_USER_HASH_MASK (DSLITE_USER_HASH_SIZE-1) + +/* No. of per ip/port config will be limited to 1000 */ +#define DSLITE_TIMEOUT_HASH_SIZE 1000 +#define DSLITE_TIMEOUT_HASH_MASK (DSLITE_TIMEOUT_HASH_SIZE - 1) +#define DSLITE_TIMEOUT_FULL_MASK 0xFFFFFFFFFFFFFFFF + +#define CNAT_MAX_SESSIONS_PER_BIB 0xFFFF + +#define FORCE_DEL 1 /* Delete static BIB entries as well */ + +/* default timeout values */ +#define DSLITE_UDP_DEFAULT 300 /* 5 min */ +#define DSLITE_UDP_MIN 120 /* 2 min */ +#define DSLITE_TCP_TRANS 240 /* 4 min */ +#define DSLITE_TCP_EST 7200 /* 2 hrs */ +#define DSLITE_TCP_V4_SYN 6 /* 6 sec */ +#define DSLITE_FRAG_MIN 2 /* 2 sec */ +#define DSLITE_ICMP_DEFAULT 60 /* 1 min */ + +extern u32 dslite_translation_create_count; +extern u32 dslite_translation_delete_count; +extern u32 dslite_translation_create_rate; +extern u32 dslite_translation_delete_rate; +extern u32 dslite_in2out_forwarding_count; +extern u32 dslite_in2out_forwarding_rate; +extern u32 dslite_out2in_forwarding_count; +extern u32 dslite_out2in_forwarding_rate; + +#define DSLITE_V6_GET_HASH(in_key, hash, mask) \ + a = in_key->ipv6[0] ^ in_key->ipv6[1] ^ in_key->ipv6[2] ^ in_key->ipv6[3] \ + ^ in_key->ipv4_key.k.ipv4 ^ ((in_key->ipv4_key.k.port << 16) | in_key->ipv4_key.k.vrf); \ + DSLITE_PRINTF(1, "%x:%x:%x:%x:%x:%x:%x\n", in_key->ipv6[0], in_key->ipv6[1], in_key->ipv6[2], in_key->ipv6[3], \ + in_key->ipv4_key.k.ipv4, in_key->ipv4_key.k.port, in_key->ipv4_key.k.vrf); \ + b = c = 0x9e3779b9;\ + /* Jenkins hash, arbitrarily use c as the "answer" */ \ + hash_mix32(a, b, c); \ + hash = c & mask; \ + + +#define DSLITE_V6_GET_USER_HASH(ipv6, hash, mask) \ + a = ipv6[0] ^ ipv6[1] ^ ipv6[2] ^ ipv6[3]; \ + b = c = 0x9e3779b9;\ + /* Jenkins hash, arbitrarily use c as the "answer" */ \ + hash_mix32(a, b, c); \ + hash = c & mask; \ + +#define DSLITE_V4_GET_HASH(in_key, hash, mask) \ + a = in_key.ipv4 ^ ((in_key.port << 16) | in_key.vrf); \ + b = c = 0x9e3779b9; \ + /* Jenkins hash, arbitrarily use c as the "answer" */ \ + hash_mix32(a, b, c); \ + hash = c & mask; + +#define PRIVATE_V4_ADDR_CHECK(addr, invalid) \ + invalid = 0; \ + int range1 = ((addr & 0xFF000000) >> 24); \ + int range2 = ((addr & 0xFFF00000) >> 20); \ + int range3 = ((addr & 0xFFFF0000) >> 16); \ + int range4 = ((addr & 0xFFFFFFF8) >> 3); \ + if(range1 != 0xa && range2 != 0xac1 && range3 != 0xc0a8 && range4 != 0x18000000) \ + invalid = 1; + +#define V4_MAPPED_V6_CHECK(v6_addr, invalid) \ + invalid = 0; \ + int word1 = v6_addr[0]; \ + int word2 = v6_addr[1]; \ + int word3 = v6_addr[2]; \ + if(!((word1 == 0) && (word2 == 0) && (word3 == 0x0000FFFF))) \ + invalid = 1; + + +extern dslite_table_entry_t dslite_table_array[DSLITE_MAX_DSLITE_ENTRIES]; +extern dslite_table_entry_t *dslite_table_ptr; + +#define DSLITE_CMP_V6_KEY(key1, key2) \ + memcmp(key1, key2, sizeof(dslite_v6_key_t)) + +#define DSLITE_CMP_V4_KEY(key1, key2) \ + memcmp(key1, key2, sizeof(dslite_v4_key_t)) + + +#define DSLITE_CMP_V6_IP(ip1, ip2) \ + memcmp(ip1, ip2, (sizeof(u32) * 4)) + + +#define DSLITE_CMP_V6_KEY1(key1, key2) \ + (key1.ipv6[0] == key2.ipv6[0]) && (key1.ipv6[1] == key2.ipv6[1]) && \ + (key1.ipv6[2] == key2.ipv6[2]) && (key1.ipv6[3] == key2.ipv6[3]) && \ + (key1.port == key2.port) && (key1.vrf == key2.vrf) + + +#define DSLITE_CMP_V6_IP1(ip1, ip2) \ + ((ip1[0] == ip2[0]) && (ip1[1] == ip2[1]) && \ + (ip1[2] == ip2[2]) && (ip1[3] == ip2[3])) + +#define DSLITE_CMP_V4_KEY1(key1, key2) \ + (key1.key64 == key2.key64) + +cnat_main_db_entry_t* +dslite_get_main_db_entry_v2(dslite_db_key_bucket_t *ki, + port_pair_t port_pair_type, + port_type_t port_type, + cnat_gen_icmp_info *info, + dslite_table_entry_t *dslite_entry_ptr, + cnat_key_t *dest_info); + +cnat_main_db_entry_t* +dslite_main_db_lookup_entry(dslite_db_key_bucket_t *ki); + + +cnat_user_db_entry_t* +dslite_user_db_lookup_entry(dslite_db_key_bucket_t *uki); + +cnat_user_db_entry_t* +dslite_user_db_create_entry(dslite_db_key_bucket_t *uki, u32 portmap_index); + +cnat_main_db_entry_t* +dslite_create_main_db_entry_and_hash(dslite_db_key_bucket_t *ki, + cnat_db_key_bucket_t *ko, + cnat_user_db_entry_t *udb); + +#endif diff --git a/vnet/vnet/vcgn/dslite_defs.h b/vnet/vnet/vcgn/dslite_defs.h new file mode 100644 index 00000000000..4860adcb77d --- /dev/null +++ b/vnet/vnet/vcgn/dslite_defs.h @@ -0,0 +1,336 @@ +/* + *------------------------------------------------------------------ + * dslite_defs.h - DSLITE structure definiitions + * + * Copyright (c) 2011-2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __DSLITE_DEFS_H__ +#define __DSLITE_DEFS_H__ + +#ifdef TOBE_PORTED +#include "spp_platform_common.h" +#include "cgse_defs.h" +#endif +#include "cnat_cli.h" +#include "cnat_config.h" +#include "cnat_ports.h" +#include "cnat_bulk_port_defs.h" + +extern u32 ds_lite_config_debug_level; + +#define SWAP_IPV6_ADDR(ipv6_hdr, dslite_entry_ptr) \ + ipv6_hdr->dst_addr[0] = ipv6_hdr->src_addr[0]; \ + ipv6_hdr->dst_addr[1] = ipv6_hdr->src_addr[1]; \ + ipv6_hdr->dst_addr[2] = ipv6_hdr->src_addr[2]; \ + ipv6_hdr->dst_addr[3] = ipv6_hdr->src_addr[3]; \ + ipv6_hdr->src_addr[0] = spp_host_to_net_byte_order_32(dslite_entry_ptr->AFTR_v6_address[0]); \ + ipv6_hdr->src_addr[1] = spp_host_to_net_byte_order_32(dslite_entry_ptr->AFTR_v6_address[1]); \ + ipv6_hdr->src_addr[2] = spp_host_to_net_byte_order_32(dslite_entry_ptr->AFTR_v6_address[2]); \ + ipv6_hdr->src_addr[3] = spp_host_to_net_byte_order_32(dslite_entry_ptr->AFTR_v6_address[3]); + +#define DSLITE_SET_TX_PKT_TYPE(type) { \ + ctx->ru.tx.packet_type = type; \ +} + +#define DSLITE_INC_STATS_V4(PTR, COUNTER, IPV4_SRC_ADDR) { \ + PTR->COUNTER++; \ +} + +#define DSLITE_INC_STATS_V6(PTR, COUNTER, IPV6_DEST_ADDR) { \ + PTR->COUNTER++; \ +} + + +#define DSLITE_INVALID_UIDX 0xffff /*invalid svi app uidb index */ +#define DSLITE_INVALID_VRFID 0xffffffff /*invalid vrf id */ + +#define DSLITE_VRF_MASK 0x3fff +#define DSLITE_MAX_VRFMAP_ENTRIES (DSLITE_VRF_MASK + 1) + +#define DSLITE_VRFMAP_ENTRY_INVALID 0xffff + +#define DSLITE_V6_PREFIX_MASK_MIN 16 +#define DSLITE_V6_PREFIX_MASK_MAX 96 +#define DSLITE_V6_PREFIX_MASK_MULTIPLE 8 + +#define DSLITE_TUNNEL_MTU_MIN 1280 +#define DSLITE_TUNNEL_MTU_MAX 9216 + +#define DSLITE_TUNNEL_TTL_MIN 0 +#define DSLITE_TUNNEL_TTL_MAX 255 + +#define DSLITE_TUNNEL_TOS_MIN 0 +#define DSLITE_TUNNEL_TOS_MAX 255 + +#define DSLITE_V4_MASK_MAX 32 + +//#define XLAT_MAX_FRAG_ID_COUNTERS (256) +#define DSLITE_AFTR_IPV4_ADDR 0xC0000001 + +#define DSLITE_MAX_TAP_RG_ENTRIES 2 +#define DSLITE_MAX_DSLITE_ENTRIES (256) +#define DSLITE_MAX_DSLITE_ID (DSLITE_MAX_DSLITE_ENTRIES-1) +/* Define the below value as 64 if first 64 entries are for NAT44 */ +#define DSLITE_INDEX_OFFSET 1 + +#define DSLITE_INVALID_DSLITE_ID (0) + +#define DSLITE_TABLE_ENTRY_DELETED 0 +#define DSLITE_TABLE_ENTRY_ACTIVE 1 +#define DSLITE_TABLE_ENTRY_DORMANT 2 +#define DSLITE_TABLE_ENTRY_INVALID_UIDB 3 + +typedef struct { + u16 tcp_initial_setup_timeout; + u16 tcp_active_timeout; + u16 udp_init_session_timeout; + u16 udp_act_session_timeout; + u16 icmp_session_timeout; + u16 temp; +} dslite_timeout_info_t; + + +typedef struct { + + u16 state; /* To use nat44 enums ?? TBD */ + u16 dslite_id; /* DSLITE_ID value for this table entry - for easy access */ + + u16 i_vrf; /* V6 uidb index */ + u16 o_vrf; /* V4 uidb index */ + + u16 cnat_main_db_max_ports_per_user; /* port limit */ + u16 tcp_mss; /*tcp max segment size for this inside vrf */ + + u32 delete_time; + + cnat_portmap_v2_t *portmap_list; + + u32 nfv9_logging_index; + u32 syslog_logging_index; + u32 AFTR_v6_address[4]; + +#define DSLITE_IPV4_TOS_OVERRIDE_FLAG 0x00000001 +#define DSLITE_IPV6_TOS_OVERRIDE_FLAG 0x00000002 +#define DSLITE_IPV4_TTL_OVERRIDE_FLAG 0x00000004 +#define DSLITE_IPV6_TTL_OVERRIDE_FLAG 0x00000008 +#define DSLITE_IPV6_FRAG_REASSEMB_ENG 0x00000010 +#define DSLITE_FTP_ALG_ENABLE 0x00000020 +#define DSLITE_RTSP_ALG_ENABLE 0x00000040 +#define DSLITE_NETFLOW_ENABLE 0x00000080 +#define DSLITE_SYSLOG_ENABLE 0x00000100 + + u16 feature_flags; + u16 tunnel_mtu; + + u8 ipv4_ttl_value; + u8 ipv6_ttl_value; + u8 ipv4_tos_value; + u8 ipv6_tos_value; + + u32 v4_if_num; /* V4 SVI ifnum */ + u32 v6_if_num; /* V6 SVI ifnum */ + u32 i_vrf_id; //inside vrf id + u32 o_vrf_id; //outside vrf id + + dslite_timeout_info_t timeout_info; + u16 cnat_static_port_range; + u16 dyn_start_port; + + u32 AFTR_v4_addr; + bulk_alloc_size_t bulk_size; /* should be equivalent to u16 - 2 bytes */ + u32 pcp_server_addr; + u16 pcp_server_port; + u8 mapping_refresh_both_direction; + u8 pad; + u16 rtsp_port; +#define DSLITE_BIDIR_REFRESH 1 + u8 dslite_enable; /* DS-Lite enable check flag */ + u8 syslog_logging_policy; /* DS-Lite Session Logging check flag */ + u8 nf_logging_policy; + + u8 temp1; + u16 temp2; + u32 temp3; + u32 rseed_ip; +} dslite_table_entry_t; + +typedef struct { + u64 v4_to_v6_invalid_uidb_drop_count; + u64 v6_to_v4_invalid_uidb_drop_count; + u64 v4_to_v6_frag_invalid_uidb_drop_count; +} dslite_global_counters_t; + +typedef struct { + u32 tap_enable; + u32 ipv4_addr; + u32 ipv6_addr[4]; +} dslite_tap_rg_t; + +extern dslite_table_entry_t *dslite_table_db_ptr; + + +#define DSLITE_ADD_UIDB_INDEX_DSLITE_ID_MAPPING(uidb_index, dslite_id) \ + *(cgse_uidb_index_cgse_id_mapping_ptr + uidb_index) = dslite_id; + +extern u8 my_instance_number; + +extern void dslite_clear_counters(u16 dslite_id); +extern void dslite_clear_per_RG_counters(); +extern dslite_global_counters_t dslite_global_counters; +extern u32 dslite_config_debug_level; +extern u32 dslite_data_path_debug_level; +extern u32 dslite_defrag_debug_level; +extern u32 dslite_debug_level; + +typedef struct { + u64 v6_to_v4_tcp_input_count; + u64 v6_to_v4_tcp_nat_error; + u64 v6_to_v4_tcp_output_count; +} dslite_v6_to_v4_tcp_counter_t; + +typedef struct { + u64 v4_to_v6_tcp_input_count; + u64 v4_to_v6_tcp_no_entry; + u64 v4_to_v6_tcp_output_count; +} dslite_v4_to_v6_tcp_counter_t; + +typedef struct { + u64 v6_to_v4_udp_input_count; + u64 v6_to_v4_udp_nat_error; + u64 v6_to_v4_udp_output_count; +} dslite_v6_to_v4_udp_counter_t; + +typedef struct { + u64 v4_to_v6_udp_input_count; + u64 v4_to_v6_udp_no_entry; + u64 v4_to_v6_udp_output_count; +} dslite_v4_to_v6_udp_counter_t; + +typedef struct { + u64 v6_to_v4_icmp_qry_input_count; + u64 v6_to_v4_icmp_qry_nat_error; + u64 v6_to_v4_icmp_qry_output_count; +} dslite_v6_to_v4_icmp_qry_counter_t; + +typedef struct { + u64 v4_to_v6_icmp_qry_input_count; + u64 v4_to_v6_icmp_qry_no_nat_entry; + u64 v4_to_v6_icmp_qry_output_count; +} dslite_v4_to_v6_icmp_qry_counter_t; + +typedef struct { + u64 v6_to_v4_icmp_error_input_count; + u64 v6_to_v4_icmp_error_nat_error; + u64 v6_to_v4_icmp_error_output_count; +} dslite_v6_to_v4_icmp_error_counter_t; + +typedef struct { + u64 v4_to_v6_icmp_error_input_count; + u64 v4_to_v6_icmp_error_no_nat_entry; + u64 v4_to_v6_icmp_error_output_count; +} dslite_v4_to_v6_icmp_error_counter_t; + +typedef struct { + u64 v6_icmp_error_input_count; + u64 v6_AFTR_echo_reply_count; + u64 v6_to_v4_icmp_error_unsupported_type_drop_count; + u64 v6_to_v4_icmp_error_no_db_entry_count; + u64 v6_to_v4_icmp_err_throttled_count; + u64 v6_to_v4_icmp_error_xlated_count; +} dslite_v6_icmp_error_counter_t; + +typedef struct { + u64 v4_to_v6_ttl_gen_count; + u64 v4_to_v6_icmp_throttle_count; + u64 v4_to_v6_ptb_gen_count; + u64 v4_to_v6_aftr_v4_echo_reply_count; + u64 v6_to_v4_ttl_gen_count; + u64 v6_to_v4_icmp_throttle_count; + u64 v6_to_v4_admin_prohib_icmp_count; + u64 v6_to_v4_aftr_v4_echo_reply_count; + u64 v6_icmp_gen_count; +} dslite_icmp_gen_counter_t; + +typedef struct { + u64 dslite_input_tunnel_pkt; + u64 dslite_encap_count; + u64 dslite_decap_count; + u64 dslite_sec_check_failed; + u64 dslite_unsupp_packet; +} dslite_common_counter_t; + +typedef struct { + + dslite_v6_to_v4_tcp_counter_t v64_tcp_counters; + dslite_v4_to_v6_tcp_counter_t v46_tcp_counters; + dslite_v6_to_v4_udp_counter_t v64_udp_counters; + dslite_v4_to_v6_udp_counter_t v46_udp_counters; + dslite_v6_to_v4_icmp_qry_counter_t v64_icmp_counters; + dslite_v4_to_v6_icmp_qry_counter_t v46_icmp_counters; + dslite_v6_to_v4_icmp_error_counter_t v64_icmp_error_counters; + dslite_v4_to_v6_icmp_error_counter_t v46_icmp_error_counters; + dslite_v6_icmp_error_counter_t dslite_v6_icmp_err_counters; + dslite_icmp_gen_counter_t dslite_icmp_gen_counters; + dslite_common_counter_t dslite_common_counters; +} dslite_counters_t; + +typedef struct { + u32 active_translations; + u32 translation_create_rate; + u32 translation_delete_rate; + u32 in2out_forwarding_rate; + u32 out2in_forwarding_rate; + u32 in2out_drops_port_limit_exceeded; + u32 in2out_drops_system_limit_reached; + u32 in2out_drops_resource_depletion; + u32 no_translation_entry_drops; + u32 pool_address_totally_free; + u32 num_subscribers; + u32 dummy; + u64 drops_sessiondb_limit_exceeded; +} dslite_common_stats_t; + +typedef struct { + u16 msg_id; + u8 rc; + u8 pad[5]; + dslite_counters_t counters; +} dslite_show_statistics_summary_resp; + + +#define CMD_GENERATE_PTB 0x1 +#define CMD_GENERATE_TTL 0x2 + +/* + * This structure is to provide abstraction for data exchanged from one + * VPP node to its disposition or further in the dslite node graph. + */ +typedef struct { + u32 icmp_gen_type; // ctx->feature_data[0] + u32 reserved1; // ctx->feature_data[1] + u32 reserved2; // ctx->feature_data[2] + u32 reserved3; // ctx->feature_data[3] +} dslite_feature_data_t; + +extern dslite_counters_t dslite_all_counters[DSLITE_MAX_DSLITE_ENTRIES]; +//extern dslite_inst_gen_counter_t dslite_inst_gen_counters[DSLITE_MAX_DSLITE_ENTRIES]; + + + extern void dslite_show_config(void); +#define STAT_PORT_RANGE_FROM_INST_PTR(inst) ((inst)->cnat_static_port_range) + +#endif /* __DSLITE_DEFS_H__ */ + diff --git a/vnet/vnet/vcgn/index_list.c b/vnet/vnet/vcgn/index_list.c new file mode 100644 index 00000000000..ec1b83b0b30 --- /dev/null +++ b/vnet/vnet/vcgn/index_list.c @@ -0,0 +1,336 @@ +/* + *------------------------------------------------------------------ + * index_list.c - vector-index-based lists. 64-bit pointers suck. + * + * Copyright (c) 2008-2009, 2011 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <string.h> +//#include <clib_lite.h> +#include <vppinfra/vec.h> +#include "index_list.h" + +/* + * index_slist_addhead + * + * args: headp -- pointer to e.g. a hash bucket + * vector -- vector containing the list + * elsize -- size of an element in this vector + * offset -- offset in each vector element of this list thread + * index_to_add -- index in the vector to add to the list + * + * Adds new items to the head of the list. Try not to screw up the args! + */ +void index_slist_addhead (index_slist_t *headp, + u8 *vector, u32 elsize, u32 offset, u32 index_to_add) +{ + return (index_slist_addhead_inline(headp, vector, elsize, offset, + index_to_add)); +} + +/* + * index_slist_remelem + * + * args: headp -- pointer to e.g. a hash bucket + * vector -- vector containing the list + * elsize -- size of an element in this vector + * offset -- offset in each vector element of this list thread + * index_to_del -- index in the vector to delete from the list + * + * Try not to screw up the args! + */ + +int index_slist_remelem (index_slist_t *headp, + u8 *vector, u32 elsize, u32 offset, + u32 index_to_delete) +{ + return (index_slist_remelem_inline(headp, vector, elsize, offset, + index_to_delete)); +} + + +/* + * index_dlist_addtail + * + * Append the indicated vector element to the doubly-linked list + * whose first element is pointed to by headp. + * + * args: head_index -- listhead vector element index. + * vector -- vector containing the list + * elsize -- size of an element in this vector + * offset -- offset in each vector element of this list thread + * index_to_add -- index in the vector to add to the list + * + * Do not call this routine to create the listhead. Simply set + * index_dlist->next = index_dlist->prev = index of item. + * + * Try not to screw up the args. + */ + +void index_dlist_addtail (u32 head_index, u8 *vector, u32 elsize, + u32 offset, u32 index_to_add) +{ + index_dlist_t *elp; + index_dlist_t *elp_next; + index_dlist_t *headp; + + headp = (index_dlist_t *)(vector + offset + elsize*head_index); + elp = (index_dlist_t *)(vector + offset + elsize*index_to_add); + elp->next = index_to_add; + elp->prev = index_to_add; + + elp->next = headp->next; + headp->next = index_to_add; + + elp_next = (index_dlist_t *)(vector + offset + elsize*elp->next); + elp->prev = elp_next->prev; + elp_next->prev = index_to_add; +} + +u32 index_dlist_remelem (u32 head_index, + u8 *vector, u32 elsize, u32 offset, + u32 index_to_delete) +{ + u32 rv = head_index; + index_dlist_t *headp, *elp, *elp_next; + + elp = (index_dlist_t *)(vector + offset + elsize*index_to_delete); + + /* Deleting the head index? */ + if (PREDICT_FALSE(head_index == index_to_delete)) { + rv = elp->next; + /* The only element on the list? */ + if (PREDICT_FALSE(rv == head_index)) + rv = EMPTY; + } + + headp = (index_dlist_t *)(vector + offset + elsize*elp->prev); + headp->next = elp->next; + elp_next = (index_dlist_t *)(vector + offset + elsize*elp->next); + elp_next->prev = elp->prev; + + elp->next = elp->prev = EMPTY; + + return rv; +} + + +#ifdef TEST_CODE2 + +typedef struct tv_ { + char junk[43]; + index_dlist_t l; +} tv_t; + + +void index_list_test_cmd(int argc, unsigned long *argv) +{ + int i, j; + u32 head_index; + index_dlist_t *headp; + tv_t *tp=0; + + vec_validate(tp, 3); + head_index = 3; + + memset(tp, 0xa, sizeof(tp[0])*vec_len(tp)); + + /* Here's how to set up the head element... */ + headp = &((tp + head_index)->l); + headp->next = headp->prev = head_index; + + for (i = 0; i < 3; i++) { + index_dlist_addtail(head_index, (u8 *)tp, sizeof(tp[0]), + STRUCT_OFFSET_OF(tv_t, l), i); + printf("headp next %d prev %d\n", + headp->next, headp->prev); + for (j = 0; j <= 3; j++) { + printf ("[%d]: next %d prev %d\n", j, + tp[j].l.next, tp[j].l.prev); + } + printf("---------------\n"); + + } + + printf("After all adds:\n"); + + printf("headp next %d prev %d\n", + headp->next, headp->prev); + + for (j = 0; j <= 3; j++) { + printf ("[%d]: next %d prev %d\n", j, + tp[j].l.next, tp[j].l.prev); + } + printf("---------------\n"); + + head_index = index_dlist_remelem (head_index, (u8 *)tp, sizeof(tp[0]), + STRUCT_OFFSET_OF(tv_t, l), 1); + + printf("after delete 1, head index %d\n", head_index); + headp = &((tp + head_index)->l); + printf("headp next %d prev %d\n", + headp->next, headp->prev); + for (j = 0; j <= 3; j++) { + printf ("[%d]: next %d prev %d\n", j, + tp[j].l.next, tp[j].l.prev); + } + printf("---------------\n"); + + index_dlist_addtail(head_index, (u8 *)tp, sizeof(tp[0]), + STRUCT_OFFSET_OF(tv_t, l), 1); + + printf("after re-add 1, head index %d\n", head_index); + headp = &((tp + head_index)->l); + printf("headp next %d prev %d\n", + headp->next, headp->prev); + for (j = 0; j <= 3; j++) { + printf ("[%d]: next %d prev %d\n", j, + tp[j].l.next, tp[j].l.prev); + } + printf("---------------\n"); + + for (i = 3; i >= 0; i--) { + head_index = index_dlist_remelem (head_index, (u8 *)tp, sizeof(tp[0]), + STRUCT_OFFSET_OF(tv_t, l), i); + printf("after delete, head index %d\n", head_index); + if (head_index != EMPTY) { + headp = &((tp + head_index)->l); + printf("headp next %d prev %d\n", + headp->next, headp->prev); + for (j = 0; j <= 3; j++) { + printf ("[%d]: next %d prev %d\n", j, + tp[j].l.next, tp[j].l.prev); + } + } else { + printf("empty list\n"); + } + printf("---------------\n"); + } +} +#endif /* test code 2 */ + +#ifdef TEST_CODE + +typedef struct tv_ { + char junk[43]; + index_slist_t l; +} tv_t; + + +void index_list_test_cmd(int argc, unsigned long *argv) +{ + int i, j; + tv_t *tp = 0; + index_slist_t *buckets = 0; + + vec_add1((u32 *)buckets, EMPTY); + vec_validate(tp, 9); + + for (i = 0; i < 10; i++) { + index_slist_addhead(buckets, (u8 *)tp, sizeof(*tp), + STRUCT_OFFSET_OF(tv_t, l), i); + } + + printf ("after adds, buckets[0] = %u\n", buckets[0]); + + for (j = 0; j < 10; j++) { + printf("tp[%d] next %u\n", j, tp[j].l); + + } + + for (i = 0; i < 10; i++) { + if (PREDICT_FALSE(index_slist_remelem(buckets, (u8 *) tp, sizeof(*tp), + STRUCT_OFFSET_OF(tv_t, l), i))) { + printf("OUCH: remelem failure at index %d\n", i); + } + if (PREDICT_FALSE(tp[i].l.next != EMPTY)) { + printf("OUCH: post-remelem next not EMPTY, index %d\n", i); + } + } + + printf ("after deletes, buckets[0] = %x\n", buckets[0]); + + for (i = 0; i < 10; i++) { + index_slist_addhead(buckets, (u8 *)tp, sizeof(*tp), + STRUCT_OFFSET_OF(tv_t, l), i); + } + + printf ("after adds, buckets[0] = %u\n", buckets[0]); + + for (j = 0; j < 10; j++) { + printf("tp[%d] next %u\n", j, tp[j].l); + + } + + for (i = 9; i >= 0; i--) { + if (PREDICT_FALSE(index_slist_remelem(buckets, (u8 *) tp, sizeof(*tp), + STRUCT_OFFSET_OF(tv_t, l), i))) { + printf("OUCH: remelem failure at index %d\n", i); + } + if ((tp[i].l.next != EMPTY)) { + printf("OUCH: post-remelem next not EMPTY, index %d\n", i); + } + } + + printf ("after deletes, buckets[0] = %x\n", buckets[0]); + + printf("add evens, then odds...\n"); + + for (i = 0; i < 10; i += 2) { + index_slist_addhead(buckets, (u8 *)tp, sizeof(*tp), + STRUCT_OFFSET_OF(tv_t, l), i); + + printf ("head = buckets[0].next = %d\n", buckets[0].next); + for (j = 0; j < 10; j++) { + printf("tp[%d] next %u\n", j, tp[j].l); + } + printf("-------------\n"); + } + + for (i = 1; i < 10; i += 2) { + index_slist_addhead(buckets, (u8 *)tp, sizeof(*tp), + STRUCT_OFFSET_OF(tv_t, l), i); + + printf ("head = buckets[0].next = %d\n", buckets[0].next); + for (j = 0; j < 10; j++) { + printf("tp[%d] next %u\n", j, tp[j].l); + } + printf("-------------\n"); + } + + printf ("after adds, buckets[0] = %u\n", buckets[0]); + + for (j = 0; j < 10; j++) { + printf("tp[%d] next %u\n", j, tp[j].l); + + } + + for (i = 9; i >= 0; i--) { + if (PREDICT_FALSE(index_slist_remelem(buckets, (u8 *) tp, sizeof(*tp), + STRUCT_OFFSET_OF(tv_t, l), i))) { + printf("OUCH: remelem failure at index %d\n", i); + } + if (PREDICT_FALSE(tp[i].l.next != EMPTY)) { + printf("OUCH: post-remelem next not EMPTY, index %d\n", i); + } + } + + printf ("after deletes, buckets[0] = %x\n", buckets[0]); + + vec_free(buckets); + vec_free(tp); +} +#endif /* test code */ diff --git a/vnet/vnet/vcgn/index_list.h b/vnet/vnet/vcgn/index_list.h new file mode 100644 index 00000000000..498cd7eb7ad --- /dev/null +++ b/vnet/vnet/vcgn/index_list.h @@ -0,0 +1,118 @@ +/* + *------------------------------------------------------------------ + * index_list.h - vector-index-based doubly-linked lists + * + * Copyright (c) 2008-2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef _INDEX_LIST_H_ +#define _INDEX_LIST_H_ 1 + +/* An index we can't possibly see in practice... */ +#define EMPTY ((u32)~0) + +typedef struct index_slist_ { + u32 next; +} index_slist_t; + +/* + * index_slist_addhead + * + * args: headp -- pointer to e.g. a hash bucket + * vector -- vector containing the list + * elsize -- size of an element in this vector + * offset -- offset in each vector element of this list thread + * index_to_add -- index in the vector to add to the list + * + * Adds new items to the head of the list. Try not to screw up the args! + */ +static inline void + index_slist_addhead_inline (index_slist_t *headp, + u8 *vector, u32 elsize, + u32 offset, u32 index_to_add) +{ + index_slist_t *addme; + + addme = (index_slist_t *)(vector + offset + elsize*index_to_add); + addme->next = EMPTY; + + if (headp->next == EMPTY) { + headp->next = index_to_add; + return; + } else { + addme->next = headp->next; + headp->next = index_to_add; + } +} + +/* + * index_slist_remelem + * + * args: headp -- pointer to e.g. a hash bucket + * vector -- vector containing the list + * elsize -- size of an element in this vector + * offset -- offset in each vector element of this list thread + * index_to_del -- index in the vector to delete from the list + * + * Try not to screw up the args! + */ + +static inline int + index_slist_remelem_inline (index_slist_t *headp, + u8 *vector, u32 elsize, + u32 offset, u32 index_to_delete) +{ + index_slist_t *findme; + index_slist_t *prev; + index_slist_t *cur; + + findme = (index_slist_t *)(vector + offset + elsize*index_to_delete); + + if (headp->next == index_to_delete) { + headp->next = findme->next; + findme->next = EMPTY; + return 0; + } + + prev = (index_slist_t *)(vector + offset + elsize*headp->next); + cur = (index_slist_t *)(vector + offset + elsize*prev->next); + while (cur != findme) { + if (cur->next == EMPTY) + return (1); + prev = cur; + cur = (index_slist_t *)(vector + offset + elsize*cur->next); + } + prev->next = findme->next; + findme->next = EMPTY; + return 0; +} + +void index_slist_addhead (index_slist_t *headp, + u8 *vector, u32 elsize, u32 offset, u32 index); +int index_slist_remelem (index_slist_t *headp, + u8 *vector, u32 elsize, u32 offset, u32 index); + +typedef struct index_dlist_ { + u32 next; + u32 prev; +} index_dlist_t; + +void index_dlist_addtail (u32 head_index, u8 *vector, u32 elsize, + u32 offset, u32 index_to_add); + +u32 index_dlist_remelem (u32 head_index, + u8 *vector, u32 elsize, u32 offset, + u32 index_to_delete); +#endif /* _INDEX_LIST_H_ */ diff --git a/vnet/vnet/vcgn/nat64_db.h b/vnet/vnet/vcgn/nat64_db.h new file mode 100644 index 00000000000..837464f6940 --- /dev/null +++ b/vnet/vnet/vcgn/nat64_db.h @@ -0,0 +1,480 @@ +/* + *------------------------------------------------------------------ + * nat64_db.h - Stateful NAT64 translation database definitions + * + * Copyright (c) 2010-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#ifndef __NAT64_DB_H__ +#define __NAT64_DB_H__ + +#include "cnat_cli.h" +#include "index_list.h" +#include "cnat_ports.h" +#include "cnat_db.h" +#include "nat64_defs.h" +#include "cnat_bulk_port_defs.h" + +nat64_vrfmap_t *nat64_map_by_vrf; + +#define SESSION_OPT + +#define HASH_ENHANCE 4 + + +#define NAT64_MAIN_DB_SIZE \ + (PLATFORM_NAT64_MAX_SESSIONS / PLATFORM_CNAT_INSTS) +#define NAT64_MAIN_HASH_SIZE \ + (HASH_ENHANCE * PLATFORM_CNAT_MAIN_PRELIM_HASH_SIZE) + +#define NAT64_MAIN_HASH_MASK (NAT64_MAIN_HASH_SIZE-1) + + +/* nb: 200000 users / 64 CNAT = 3125, 76% occupancy */ +#define NAT64_USER_HASH_SIZE CNAT_USER_HASH_SIZE +#define NAT64_USER_HASH_MASK (NAT64_USER_HASH_SIZE-1) + +/* Number of sessions per BIB entry/NAT64 translation + - nsessions is u16 type. So selected 0xFFFF + - Ideally Sessions per transltion will not reach the limit + - Only DoS can possible. It can take care of it */ +#define NAT64_MAX_SESSIONS_PER_BIB 0xFFFF + +/* No. of per ip/port config will be limited to 1000 */ +/* totally 25K across all instances) */ +#define NAT64_TIMEOUT_HASH_SIZE \ + PLATFORM_NAT64_TIMEOUT_HASH_SIZE + +#define NAT64_TIMEOUT_HASH_MASK (NAT64_TIMEOUT_HASH_SIZE - 1) +#define NAT64_TIMEOUT_FULL_MASK 0xFFFFFFFFFFFFFFFF + + +#define FORCE_DEL 1 /* Delete static BIB entries as well */ + +/* default timeout values */ +#define NAT64_UDP_DEFAULT 300 /* 5 min */ +#define NAT64_UDP_MIN 120 /* 2 min */ +#define NAT64_TCP_TRANS 240 /* 4 min */ +#define NAT64_TCP_EST 7200 /* 2 hrs */ +#define NAT64_TCP_V4_SYN 6 /* 6 sec */ +#define NAT64_FRAG_MIN 2 /* 2 sec */ +#define NAT64_ICMP_DEFAULT 60 /* 1 min */ + + +#define NAT64_V6_GET_HASH(in_key, hash, mask) \ + a = in_key->ipv6[0] ^ in_key->ipv6[1] ^ in_key->ipv6[2] ^ in_key->ipv6[3] \ + ^ ((in_key->port << 16) | in_key->vrf); \ + b = c = 0x9e3779b9;\ + /* Jenkins hash, arbitrarily use c as the "answer" */ \ + hash_mix32(a, b, c); \ + hash = c & mask; \ + + +#define NAT64_V4_GET_HASH(in_key, hash, mask) \ + a = in_key.ipv4 ^ ((in_key.port << 16) | in_key.vrf); \ + b = c = 0x9e3779b9; \ + /* Jenkins hash, arbitrarily use c as the "answer" */ \ + hash_mix32(a, b, c); \ + hash = c & mask; + + + +#define NAT64_V6_GET_SESSION_HASH(bib_index, in_addr, port, vrf, hash, mask) \ + a = bib_index ^ in_addr[0] ^ in_addr[1] ^ in_addr[2] ^ in_addr[3] \ + ^ port ^ vrf; \ + b = c = 0x9e3779b9; \ + /* Jenkins hash, arbitrarily use c as the "answer" */ \ + hash_mix32(a, b, c); \ + hash = c & mask; + +#define NAT64_V4_GET_SESSION_HASH(bib_index, in_addr, port, vrf, hash, mask) \ + a = bib_index ^ in_addr ^ port ^ vrf; \ + b = c = 0x9e3779b9; \ + /* Jenkins hash, arbitrarily use c as the "answer" */ \ + hash_mix32(a, b, c); \ + hash = c & mask; + + +extern index_slist_t *nat64_bib_out2in_hash; +extern index_slist_t *nat64_bib_in2out_hash; +extern index_slist_t *nat64_bib_user_hash; +extern index_slist_t *nat64_session_out2in_hash; +#ifndef SESSION_OPT +extern index_slist_t *nat64_session_in2out_hash; +#endif +extern index_slist_t *nat64_frag_out2in_hash; +extern index_slist_t *nat64_frag_in2out_hash; +extern index_slist_t *nat64_timeout_hash; + + +/* + * nat64_ bib_entry_t + * This structure depicts Binding Information Base of NAT64 sessions. + * It stores information about the inside v6 source transport address and + * corresponding outside v4 source transport address for each protocol. + */ + +typedef struct { + + index_slist_t nat64_bib_out2in_hash; + index_slist_t nat64_bib_in2out_hash; + + /* 0x08 */ + u16 flags; /* flags in cnat_db.h (cnat_main_db_entry_t) */ +#define NAT64_DB_FLAG_STATIC_PORT CNAT_DB_FLAG_STATIC_PORT +#define NAT64_DB_NAT64_FLAG CNAT_DB_NAT64_FLAG +#define NAT64_DB_FLAG_ALG_ENTRY CNAT_DB_FLAG_ALG_ENTRY +#define NAT64_DB_FLAG_PCPI CNAT_DB_FLAG_PCPI +#define NAT64_DB_FLAG_PCPE CNAT_DB_FLAG_PCPE + + /* 0x0A */ + u16 nat64_inst_id; + /* 0x0C */ + u32 user_index; + + /* 0x10 */ + nat64_v4_key_t v4_out_key; + + /* 0x18 */ + nat64_v6_key_t v6_in_key; + + /* 0x2C */ + index_dlist_t user_ports; + /* 0x34 */ + u32 session_head_index; + /* 0x38 - 56B*/ + u16 nsessions; + u16 pad2; + + /* 0x3C - 60B */ + u32 in2outpkts; + u32 out2inpkts; + /* 0x44 - 68B */ + + /* 0x42 - 70B */ + union { /* used by FTP ALG, pkt len delta due to FTP PORT cmd */ + u16 delta; + i8 alg_dlt[2]; /* two delta values, 0 for previous, 1 for current */ + u16 il; /* Used to indicate if interleaved mode is used + in case of RTSP ALG */ + } alg; + + u16 temp1; + + u32 entry_expires; + + u32 temp3; + /* unused, temp1 ,temp2 and temp3 put to make it in sync with nat44 main db entry size */ + /* size of = 0x54 = 84 B */ + u32 unused; + +} nat64_bib_entry_t ; + +/* + * nat64_bib_user_entry_t + * This structure stores information about translations of a particular user + * (User here refers to a same inside source address) + */ +typedef struct { + /* 0x00 */ + index_slist_t user_hash; + /* 0x04 */ + u16 ntranslations; + /* 0x06 */ + u8 icmp_msg_count; + /* 0x07 */ + u8 flags; +#define NAT64_USER_DB_NAT64_FLAG CNAT_USER_DB_NAT64_FLAG + + /* 0x08 */ + u32 translation_list_head_index; + /* 0x0C */ + u32 portmap_index; + /* 0x10 */ + nat64_v6_key_t v6_in_key; + /* 0x24 = 36 B */ + + u32 align1; /* Make it 8B boundary and in sync with nat44 user db entry size */ +#ifndef NO_BULK_LOGGING + /* size of = 0x28 = 40 B */ + /* Now adding 8 more bytes for bulk allocation.. This makes it + * 0x30 (48). For nat64 stful, we may support bulk allocation + * later */ + /* Indicates the currently used bulk port range */ + i16 bulk_port_range_cache[BULK_RANGE_CACHE_SIZE]; +#endif /* NO_BULK_LOGGING */ +} nat64_bib_user_entry_t; + +/* + * nat64_session_entry_t + * This structure represents the session table. It maintains the information + * about the flow of the packets. It would consist of source and destination + * (inside and outside) ipv4 and ipv4 transport addresses. + */ +typedef struct { + + /* 0x00 */ + index_slist_t nat64_session_out2in_hash; + + /* 0x04 */ + u32 bib_index; /* would point to v4/v6 src transport address */ + + /* 0x08 */ + nat64_v4_key_t v4_dest_key; + +#ifndef SESSION_OPT + index_slist_t nat64_session_in2out_hash; + nat64_v6_key_t v6_dest_key; +#endif + + /* 0x10 */ + u16 flags;/* Will be used for flags same as nat44 session */ + + /* 0x12 */ + u16 timeout; + + /* 0x14 */ + u32 entry_expires; + /* 0x18 */ + index_dlist_t bib_list; + /* 0x20 = 32 B */ + + union { /* alg same as cnat_main_db_t */ + u16 delta; + i8 alg_dlt[2]; + u16 il; + } alg; + + /* 0x22 */ + u16 tcp_flags; /* Mainly TCP events - check nat64_tcp_sm.h */ + + /* 0x24 */ + u32 tcp_seq_num; + + /* 0x28 */ /* unused1, unused2 and unused3 are put to make it in sync with + * cnat_session_db */ + u32 unused1; + + /* 0x2C */ + u32 unused2; + + /* 0x30 */ + u16 unused3; + + /* 0x32 - 50B */ + +} nat64_session_entry_t; + +/* + * nat64_session_tcp_init_entry_t + * This structure will be used to store information about v4 initiation + * tcp entries. + */ +typedef struct { + nat64_v6_key_t v6_in_key; + nat64_v4_key_t v4_out_key; +} nat64_session_tcp_init_entry_t; + +/* + * nat64_in_v6_frag_entry_t + * This structure will be used to store information about fragment flows + * that are coming from inside v6 hosts. + */ +typedef struct { + index_slist_t nat64_frag_in2out_hash; + + u32 v6_src_addr[4]; + u32 v6_destn_addr[4]; + u32 frag_iden; + u16 vrf; + u16 pad1; +} nat64_in_v6_frag_entry_t ; + +/* + * nat64_out_v4_frag_entry_t + * This structure will be used to store information about fragment flows + * that are coming from outside v4 machines. + */ +typedef struct { + index_slist_t nat64_frag_out2in_hash; + + u32 v4_src_addr; + u32 v4_destn_addr; + u16 frag_iden; + u16 vrf; +} nat64_out_v4_frag_entry_t ; + +/* + * nat64_timeout _t + * These following structures will be used to store information destination + * timeouts configured. + */ +typedef struct { + nat64_v4_key_t timeout_key; + u16 timeout_value; +} nat64_timeout_t; + +/* + * nat64_timeout_db_entry_t + */ +typedef struct { + nat64_timeout_t t_key; + index_slist_t t_hash; +} nat64_timeout_db_entry_t; + + +typedef union { + cnat_main_db_entry_t nat44_main_db; + nat64_bib_entry_t nat64_bib_db; +} cgse_nat_db_entry_t; + +typedef union { + cnat_session_entry_t nat44_session_db; + nat64_session_entry_t nat64_session_db; +} cgse_nat_session_db_entry_t; + +typedef union { + cnat_user_db_entry_t nat44_user_db; + nat64_bib_user_entry_t nat64_user_db; +} cgse_nat_user_db_entry_t; + +extern index_slist_t *nat64_bib_out2in_hash; +extern index_slist_t *nat64_bib_in2out_hash; +extern index_slist_t *nat64_bib_user_hash; +extern index_slist_t *nat64_session_out2in_hash; +extern index_slist_t *nat64_session_in2out_hash; +extern index_slist_t *nat64_frag_out2in_hash; +extern index_slist_t *nat64_frag_in2out_hash; +extern index_slist_t *nat64_timeout_hash; + +extern nat64_bib_entry_t *nat64_bib_db; +extern nat64_bib_user_entry_t *nat64_bib_user_db; +extern nat64_session_entry_t *nat64_session_db; +extern nat64_in_v6_frag_entry_t *nat64_in_frag_db; +extern nat64_out_v4_frag_entry_t *nat64_out_frag_db; +extern nat64_session_tcp_init_entry_t *nat64_tcp_init_db ; +extern nat64_timeout_db_entry_t *nat64_timeout_db; + +extern nat64_table_entry_t nat64_table_array[NAT64_MAX_NAT64_ENTRIES]; +extern nat64_table_entry_t *nat64_table_ptr; + +extern cgse_nat_db_entry_t *cgse_nat_db; +extern cgse_nat_user_db_entry_t *cgse_user_db; +extern cgse_nat_session_db_entry_t *cgse_session_db; + +void nat64_bib_user_db_delete (nat64_bib_user_entry_t *up); + +nat64_bib_user_entry_t* +nat64_bib_user_db_create_entry(nat64_v6_key_t *uki, u32 bucket, + u32 portmap_index); + +nat64_bib_user_entry_t* +nat64_bib_user_db_lookup_entry(nat64_v6_key_t *uki, u32 *bucket); + + +nat64_bib_entry_t* +nat64_bib_db_lookup_entry(nat64_v6_key_t *ki); + +void nat64_bib_db_in2out_hash_delete (nat64_bib_entry_t *ep); + +void nat64_bib_db_out2in_hash_delete (nat64_bib_entry_t *ep); + +nat64_bib_entry_t * +nat64_create_bib_db_entry_and_hash(nat64_v6_key_t *ki, + nat64_v4_key_t *ko, + nat64_bib_user_entry_t *udb); + + +void nat64_delete_bib_db_entry (nat64_bib_entry_t *ep, u8 force); + +nat64_bib_entry_t * +nat64_bib_db_lookup_entry_out2in (nat64_v4_key_t *ko); + +nat64_bib_entry_t * +nat64_get_bib_db_entry (nat64_v6_key_t *ki, + port_pair_t port_pair_type, + port_type_t port_type, + cnat_gen_icmp_info *info); + + +nat64_bib_entry_t* +nat64_create_static_bib_db_entry (nat64_v6_key_t *ki, + nat64_v4_key_t *ko, + nat64_table_entry_t *my_table, + cnat_gen_icmp_info *info); + + + +//void nat64_session_db_in2out_hash_delete (nat64_session_entry_t *ep); +void nat64_session_db_out2in_hash_delete (nat64_session_entry_t *ep); + +/*nat64_session_entry_t * +nat64_session_db_lookup_entry(nat64_v6_key_t *ki, u32 bib_index); */ + + +nat64_session_entry_t * +nat64_session_db_lookup_entry_out2in (nat64_v4_key_t *ko,u32 bib_index); + +/* +nat64_session_entry_t * +nat64_create_session_db_entry(nat64_v6_key_t *ki, + nat64_v4_key_t *ko, + nat64_bib_entry_t *bdb); +*/ +nat64_session_entry_t * +nat64_create_session_db_entry_v2( nat64_v4_key_t *ko, + nat64_bib_entry_t *bdb); + + +//void nat64_delete_session_db_entry (nat64_session_entry_t *ep); +void nat64_delete_session_db_entry_v2 (nat64_session_entry_t *ep, u8 force); + +u32 nat64_timeout_db_hash_lookup (nat64_v4_key_t t_key); + +u16 query_and_update_db_timeout_nat64(nat64_session_entry_t *db); + +void nat64_timeout_db_hash_add (nat64_timeout_db_entry_t *t_entry); + +u16 nat64_timeout_db_create (nat64_timeout_t t_entry); + +void nat64_timeout_db_delete(nat64_v4_key_t t_key); + +#define NAT64_CMP_V6_KEY(key1, key2) \ + memcmp(key1, key2, sizeof(nat64_v6_key_t)) + +#define NAT64_CMP_V4_KEY(key1, key2) \ + memcmp(key1, key2, sizeof(nat64_v4_key_t)) + + +#define NAT64_CMP_V6_IP(ip1, ip2) \ + memcmp(ip1, ip2, (sizeof(u32) * 4)) + + +#define NAT64_CMP_V6_KEY1(key1, key2) \ + (key1.ipv6[0] == key2.ipv6[0]) && (key1.ipv6[1] == key2.ipv6[1]) && \ + (key1.ipv6[2] == key2.ipv6[2]) && (key1.ipv6[3] == key2.ipv6[3]) && \ + (key1.port == key2.port) && (key1.vrf == key2.vrf) + + +#define NAT64_CMP_V6_IP1(ip1, ip2) \ + ((ip1[0] == ip2[0]) && (ip1[1] == ip2[1]) && \ + (ip1[2] == ip2[2]) && (ip1[3] == ip2[3])) + +#define NAT64_CMP_V4_KEY1(key1, key2) \ + (key1.key64 == key2.key64) + + +extern u8 nat64_timeout_dirty_flag[NAT64_MAX_NAT64_ENTRIES]; + +#endif diff --git a/vnet/vnet/vcgn/nat64_defs.h b/vnet/vnet/vcgn/nat64_defs.h new file mode 100644 index 00000000000..47e431a7462 --- /dev/null +++ b/vnet/vnet/vcgn/nat64_defs.h @@ -0,0 +1,576 @@ +/* + *------------------------------------------------------------------ + * nat64_defs.h - NAT64 structure definiitions + * + * Copyright (c) 2007-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __NAT64_DEFS_H__ +#define __NAT64_DEFS_H__ + +#ifdef TOBE_PORTED +#include "spp_platform_common.h" +#include "cgse_defs.h" +#include "xlat_defs.h" +#endif +#include "cnat_cli.h" +#include "cnat_ports.h" +#include "tcp_header_definitions.h" +#include "nat64_tcp_sm.h" +#include "cnat_db.h" + +#define NAT64_MAX_FRAG_ID_COUNTERS (256) + +#define NAT64_MAX_NAT64_ENTRIES 500 + +#define NAT64_MAX_ID (NAT64_MAX_NAT64_ENTRIES-1) + +#define NAT64_INVALID_ID (0) + +#define NAT64_MAX_CFG_INSTANCES 64 + +#define NAT64_TABLE_ENTRY_DELETED 0 +#define NAT64_TABLE_ENTRY_ACTIVE 1 +#define NAT64_TABLE_ENTRY_DORMANT 2 +#define NAT64_TABLE_ENTRY_INVALID_UIDB 3 + +#define NAT64_MAX_TRANSLATION_ENTRIES PLATFORM_MAX_TRANSLATION_ENTRIES + +#define NAT64_WKP_PREFIX_LEN 96 +#define NAT64_WKP_PREFIX_0 0x0064FF9B +#define NAT64_WKP_PREFIX_1 0x00000000 +#define NAT64_WKP_PREFIX_2 0x00000000 +#define NAT64_WKP_PREFIX_3 0x00000000 + + +/* Reset the expiry time only if it is not 0 +** if it is 0 - then queue for delete by clear command +**/ + +#define NAT64_TIMEOUT_RST(db) \ + if(PREDICT_TRUE(db->entry_expires !=0 )) \ + db->entry_expires = cnat_current_time; + +extern u32 nat64_config_debug_level; +extern u32 nat64_data_path_debug_level; + +extern u32 nat64_translation_create_count[NAT64_MAX_NAT64_ENTRIES]; +extern u32 nat64_translation_delete_count[NAT64_MAX_NAT64_ENTRIES]; +extern u32 nat64_translation_create_rate[NAT64_MAX_NAT64_ENTRIES]; +extern u32 nat64_translation_delete_rate[NAT64_MAX_NAT64_ENTRIES]; +extern u32 nat64_in2out_forwarding_count[NAT64_MAX_NAT64_ENTRIES]; +extern u32 nat64_in2out_forwarding_rate[NAT64_MAX_NAT64_ENTRIES]; +extern u32 nat64_out2in_forwarding_count[NAT64_MAX_NAT64_ENTRIES]; +extern u32 nat64_out2in_forwarding_rate[NAT64_MAX_NAT64_ENTRIES]; + +extern u32 nat64_translation_create_count_old[NAT64_MAX_NAT64_ENTRIES]; +extern u32 nat64_translation_delete_count_old[NAT64_MAX_NAT64_ENTRIES]; +extern u32 nat64_in2out_forwarding_count_old[NAT64_MAX_NAT64_ENTRIES]; +extern u32 nat64_out2in_forwarding_count_old[NAT64_MAX_NAT64_ENTRIES]; + +extern u16 *nat64_frag_id_counter_ptr; + +typedef struct { + u64 v6_to_v4_tcp_input_count; + u64 v6_to_v4_tcp_non_translatable_drop_count; + u64 v6_to_v4_tcp_state_drop_count; + u64 v6_to_v4_tcp_no_db_drop_count; + u64 v6_to_v4_tcp_output_count; +} nat64_v6_to_v4_tcp_counter_t; + +typedef struct { + u64 v4_to_v6_tcp_input_count; + u64 v4_to_v6_tcp_no_db_drop_count; + u64 v4_to_v6_tcp_v4_init_policy_drop_count; + u64 v4_to_v6_tcp_state_drop_count; + u64 v4_to_v6_tcp_output_count; + u64 v4_to_v6_tcp_filter_drop_count; +} nat64_v4_to_v6_tcp_counter_t; + +typedef struct { + u64 v6_to_v4_udp_input_count; + u64 v6_to_v4_udp_non_translatable_drop_count; + u64 v6_to_v4_udp_no_db_drop_count; + u64 v6_to_v4_udp_output_count; + u64 v6_to_v4_udp_checksum_zero_count; +} nat64_v6_to_v4_udp_counter_t; + +typedef struct { + u64 v4_to_v6_udp_input_count; + u64 v4_to_v6_udp_no_db_drop_count; + u64 v4_to_v6_udp_filter_drop_count; + u64 v4_to_v6_udp_output_count; + u64 v4_to_v6_udp_crc_zero_drop_count; + u64 v4_to_v6_udp_frag_crc_zero_drop_count; + u64 v4_to_v6_udp_crc_zero_recycle_sent_count; + u64 v4_to_v6_udp_crc_zero_recycle_drop_count; +} nat64_v4_to_v6_udp_counter_t; + +typedef struct { + u64 v6_to_v4_icmp_input_count; + u64 v6_to_v4_icmp_no_db_drop_count; + u64 v6_to_v4_icmp_non_translatable_drop_count; + u64 v6_to_v4_icmp_qry_output_count; +} nat64_v6_to_v4_icmp_counter_t; + +typedef struct { + u64 v4_to_v6_icmp_input_count; + u64 v4_to_v6_icmp_no_db_drop_count; + u64 v4_to_v6_icmp_filter_drop; + u64 v4_to_v6_icmp_qry_output_count; +} nat64_v4_to_v6_icmp_counter_t; + +typedef struct { + u64 v6_to_v4_icmp_error_input_count; + u64 v6_to_v4_icmp_error_no_db_drop_count; + u64 v6_to_v4_icmp_error_invalid_next_hdr_drop_count; + u64 v6_to_v4_icmp_error_non_translatable_drop_count; + u64 v6_to_v4_icmp_error_unsupported_type_drop_count; + u64 v6_to_v4_icmp_error_output_count; +} nat64_v6_to_v4_icmp_error_counter_t; + +typedef struct { + u64 v4_to_v6_icmp_error_input_count; + u64 v4_to_v6_icmp_error_no_db_drop_count; + u64 v4_to_v6_icmp_error_unsupported_type_drop_count; + u64 v4_to_v6_icmp_error_unsupported_protocol_drop_count; + u64 v4_to_v6_icmp_error_output_count; +} nat64_v4_to_v6_icmp_error_counter_t; + + + +typedef struct { + u64 nat64_v4_frag_input_count; + u64 nat64_v4_frag_forward_count; + u64 nat64_v4_frag_drop_count; + u64 nat64_v4_frag_throttled_count; + u64 nat64_v4_frag_timeout_drop_count; + u64 nat64_v4_frag_tcp_input_count; + u64 nat64_v4_frag_udp_input_count; + u64 nat64_v4_frag_icmp_input_count; + + u64 nat64_v6_frag_input_count; + u64 nat64_v6_frag_forward_count; + u64 nat64_v6_frag_drop_count; + u64 nat64_v6_frag_throttled_count; + u64 nat64_v6_frag_timeout_drop_count; + u64 nat64_v6_frag_tcp_input_count; + u64 nat64_v6_frag_udp_input_count; + u64 nat64_v6_frag_icmp_input_count; + u64 nat64_v6_frag_invalid_input_count; +} nat64_frag_counter_t; + +typedef struct { + u64 v6_to_v4_options_input_count; + u64 v6_to_v4_options_drop_count; + u64 v6_to_v4_options_forward_count; + u64 v6_to_v4_options_no_db_drop_count; + u64 v6_to_v4_unsupp_proto_count; + + u64 v4_to_v6_options_input_count; + u64 v4_to_v6_options_drop_count; + u64 v4_to_v6_options_forward_count; + u64 v4_to_v6_options_no_db_drop_count; + u64 v4_to_v6_unsupp_proto_count; +} nat64_options_counter_t; + +typedef struct { + u64 v4_icmp_gen_count; + u64 v6_icmp_gen_count; +} nat64_icmp_gen_counter_t; + +typedef struct{ + u32 nat64_num_translations; + u32 nat64_num_dynamic_translations; + u32 nat64_num_static_translations; + u32 nat64_sessions; + u64 nat64_port_limit_exceeded; + u64 nat64_system_limit_reached; + u64 nat64_resource_depletion_drops; + u64 nat64_no_translation_entry_drops; + u64 nat64_filtering_drops ; + u64 nat64_invalid_ipv6_prefix_drops; + u32 num_subscribers; + u32 dummy; + u64 drops_sessiondb_limit_exceeded; +} nat64_inst_gen_counter_t; + +typedef struct { + + nat64_v6_to_v4_tcp_counter_t v64_tcp_counters; + nat64_v4_to_v6_tcp_counter_t v46_tcp_counters; + nat64_v6_to_v4_udp_counter_t v64_udp_counters; + nat64_v4_to_v6_udp_counter_t v46_udp_counters; + nat64_v6_to_v4_icmp_counter_t v64_icmp_counters; + nat64_v4_to_v6_icmp_counter_t v46_icmp_counters; + nat64_v6_to_v4_icmp_error_counter_t v64_icmp_error_counters; + nat64_v4_to_v6_icmp_error_counter_t v46_icmp_error_counters; + nat64_frag_counter_t nat64_frag_counters; + nat64_options_counter_t nat64_options_counters; + nat64_icmp_gen_counter_t nat64_icmp_gen_counters; + +} nat64_counters_t; + +/* + * nat64_portmap_v2_t + * This structure stores information about the IP address and ports + * available for NAT for this nat64 instance. + */ + +typedef struct { + u32 delete_time; + u32 last_sent_timestamp; + u32 inuse; + u32 ipv4_address; /* native bit order */ + uword bm[(BITS_PER_INST + BITS(uword)-1)/BITS(uword)]; +} nat64_portmap_t; + +/* + * nat64_v4_db_key_t + * This structure gives information about the v4 transport address + * (ipv4, port, protocol) + */ +typedef struct { + u32 ipv4; + u16 port; + u16 vrf; //bit0-12:inst_id, bit13:unused, bit14-15:protocol +} nat64_v4_db_key_t; + +/* Union will be easier while compare/hash */ +typedef union { + nat64_v4_db_key_t k; + u64 key64; +} nat64_v4_key_t; +/* + * nat64_v6_db_key_t + * This structure gives information about the v6 transport address + * (ipv6, port, protocol) + */ +typedef struct { + u32 ipv6[4]; + u16 port; + u16 vrf; //bit0-12:inst_id, bit13:unused, bit14-15:protocol +} nat64_v6_key_t; + + +typedef struct { + u16 udp_timeout; + u16 tcp_trans_timeout; + u16 tcp_est_timeout; + u16 tcp_v4_init_timeout; + u16 frag_timeout; + u16 icmp_timeout; +} nat64_timeout_info_t; + +#define NAT64_UDP_DEF 300 /* 5min */ +#define NAT64_TCP_TRANS_DEF 240 /* 4min */ +#define NAT64_TCP_EST_DEF 7200 /* 2Hrs */ +#define NAT64_TCP_V4_DEF 6 /* 6 sec */ +#define NAT64_FRAG_DEF 2 /* 2 sec */ +#define NAT64_ICMP_DEF 60 /* 60 sec */ + +/* + * nat64_table_entry_t + * This structure is used to store information regarding every nat64 instance. + */ + +/* structure will hold the L4 information, of a particular frag stream set + * src_port - holds the original src port + * dst_port - holds the original dst port + * total_len - useful only in ICMP nodes + * cnat_port - vlaue used for looksups + * next_prot - Protocol after translation */ + +typedef struct l4_frag_info { + u16 next_node_idx; + u16 src_port; + u16 dst_port; + u16 total_length; + u8 protocol; + u16 cnat_prot; + u16 next_prot; +} l4_frag_info_t; + +typedef struct { + u16 state; + u16 nat64_id; /* nat64_id value for this table entry - for easy access */ + + u16 v4_uidb_index; /* V4 uidb index */ + u16 v6_uidb_index; /* V6 uidb index */ + + u8 octet0_position; + u8 octet1_position; + u8 octet2_position; + u8 octet3_position; + + u16 v4_to_v6_tcp_mss; /* TCP MSS */ + u16 v6_to_v4_tcp_mss; /* TCP MSS */ + + /* + * V6 NAT64 prefix value and mask size + */ + u32 v6_prefix[4]; + u32 v6_prefix_mask[4]; + + u8 v6_prefix_mask_len; + u8 ubits_reserved_on; +#define IPV4_TOS_OVERRIDE_FLAG 0x1 +#define IPV6_TOS_OVERRIDE_FLAG 0x2 +#define NAT64_STFUL_RTSP_ALG_ENABLE 0x4 + u8 feature_flags; + + u8 ipv4_tos_value; + u8 ipv6_tos_value; + u8 df_bit_clear; + u8 ipv6_mtu_set; + + u8 filtering_policy; +#define NAT64_ADDRESS_DEPENDENT_ENABLE 1 + u8 tcp_policy; +#define NAT64_TCP_SECURITY_FLAG_DISABLE 1 + u8 ftp_flags; + + u8 tcp_v4_init_enable; +#define NAT64_TCP_V4_INIT_ENABLE 1 + + u8 logging_policy; +#define NAT64_BIB_LOG_ENABLE 0 /* Default */ +#define NAT64_SESSION_LOG_ENABLE 1 + +#define NAT64_BIDIR_REFRESH 1 /* 1 - timer refresh in both direction */ +#define NAT64_UNIDIR_REFRESH 0 /* 0 - default (only v6 side refresh timer)*/ + + u8 nat64_refresh_both_direction; /* 0 - default (only v6 side refresh timer) */ +#define NAT64_BIDIR_REFRESH 1 /* 1 - timer refresh in both direction */ + + u8 udp_zero_checksum; /* 0 - default (calc checksum) */ +#define NAT64_UDP_ZERO_CHECKSUM_DROP 1 /* 1 -drop */ + + u16 port_limit; + + cnat_portmap_v2_t *port_map; + + u32 logging_index; + + nat64_timeout_info_t timeout_info; + /* + * These fields are not used much, let us keep it in the end + */ + u32 v4_vrf_id; /* V4 vrf id */ + u32 v6_vrf_id; /* V6 vrf id */ + + u32 v4_if_num; /* V4 SVI ifnum */ + u32 v6_if_num; /* V6 SVI ifnum */ + + u16 dyn_start_port; + + u16 pcp_server_port; + u32 pcp_server_addr[4]; + u32 rseed_ip; +#define NAT64_FRAG_ENABLE 1 +#define NAT64_FRAG_DISABLE 0 + u8 frag_state; + u8 nat64_enable; /* Enable/Disable this instance. */ + + u16 rtsp_port; + +} nat64_table_entry_t; + + + +extern nat64_table_entry_t nat64_table_array[NAT64_MAX_NAT64_ENTRIES]; +extern nat64_table_entry_t *nat64_table_ptr; +extern nat64_counters_t nat64_all_counters[NAT64_MAX_NAT64_ENTRIES]; +extern nat64_inst_gen_counter_t nat64_inst_gen_counters[NAT64_MAX_NAT64_ENTRIES]; + +typedef struct nat64_common_pipeline_data_ { +#ifdef TOBE_PORTED + spp_node_main_vector_t *nmv; +#endif + + u16 *nat64_id_ptr; + + nat64_table_entry_t *nat64_entry_ptr; + +} nat64_common_pipeline_data_t; + +typedef struct nat64_v6_to_v4_pipeline_data_ { + nat64_common_pipeline_data_t common_data; + + u32 bib_bucket; + u32 session_bucket; + + nat64_v6_key_t v6_in_key; + nat64_v6_key_t v6_dest_key; + + /* + * IPv6 Data, everthing in host order except for the addr fields + */ + u32 version_trafficclass_flowlabel; + + u16 payload_length; + u8 next_header; + u8 hop_limit; + + /* + * These Address fields are in Network Order, so that + * it is easy to extract the IPv4 address from them + */ + u32 ipv6_src[4]; + + u32 ipv6_dst[4]; + + u8 frag_next_header; + u8 frag_reserved; + u16 frag_offset_res_m; + u32 frag_identification; + + ipv4_header *ipv4_header; + union { + struct _v4_l4_info { + u8 *ipv4_l4_header; + u8 pad0; + u8 pad1; + u8 pad2; + u8 pad3; + } v4_l4_info; + struct _v4_icmp_info { + icmp_v4_t *ipv4_icmp_header; + u8 old_icmp_type; + u8 new_icmp_type; + u8 old_icmp_code; + u8 new_icmp_code; + u16 checksum; + u16 old_iden; // length (ICMP extn), ptr (param) + u16 new_iden; // ----- do ------------- + u16 old_seq; // MTU for PTB case + u16 new_seq; // ----- do ------------- + } v4_icmp_info; + struct _v4_udp_info { + udp_hdr_type_t *ipv4_udp_header; + u8 pad0; + u8 pad1; + u8 pad2; + u8 pad3; + } v4_udp_info; + struct _v4_tcp_info { + tcp_hdr_type *ipv4_tcp_header; + u16 old_src_port; + u16 new_src_port; + u16 dest_port; + nat64_tcp_events tcp_event; + } v4_tcp_info; + } l4_u; + + + l4_frag_info_t *frag_info; /* port for tcp/udp, ident - icmp */ + + + /* Counters will be added here */ + union { + nat64_v6_to_v4_tcp_counter_t *tcp_counter; + nat64_v6_to_v4_udp_counter_t *udp_counter; + nat64_v6_to_v4_icmp_counter_t *icmp_counter; + nat64_v6_to_v4_icmp_error_counter_t *icmp_error_counter; + nat64_frag_counter_t *frag_counter; + nat64_options_counter_t *options_counter; + } nat64_ctr_u; + nat64_icmp_gen_counter_t *icmp_gen_counter; +} nat64_v6_to_v4_pipeline_data_t; + + +typedef struct nat64_v4_to_v6_pipeline_data_ { + nat64_common_pipeline_data_t common_data; + + u32 bib_bucket; + u32 session_bucket; + + nat64_v4_key_t v4_src_key; /* Will be translated using Prefix */ + nat64_v4_key_t v4_dest_key; /* will be the out key for NAT64 */ + + /* + * IPv4 data + */ + u8 version_hdr_len_words; + u8 tos; + u16 total_len_bytes; + + u16 identification; + u16 frag_flags_offset; + + u8 ttl; + u8 protocol; + u16 l4_checksum; + + u32 ipv4_src_addr; + u32 ipv4_dst_addr; + + /* + * Pointers to IPv6 headers + */ + ipv6_header_t *ipv6_header; + ipv6_frag_header_t *ipv6_frag_header; + + union { + struct _v6_l4_info { + u8 *ipv6_l4_header; + u8 pad0; + u8 pad1; + u8 pad2; + u8 pad3; + } v6_l4_info; + struct _v6_icmp_info { + icmp_v6_t *ipv6_icmp_header; + u8 old_icmp_type; + u8 new_icmp_type; + u8 old_icmp_code; + u8 new_icmp_code; + u16 old_iden; // length (ICMP extn), ptr (param) + u16 new_iden; // ----- do ------------- + u16 old_seq; // MTU for PTB case + u16 new_seq; // ----- do ------------- + } v6_icmp_info; + struct _v6_udp_info { + udp_hdr_type_t *ipv6_udp_header; + u8 pad0; + u8 pad1; + u8 pad2; + u8 pad3; + } v6_udp_info; + struct _v6_tcp_info { + tcp_hdr_type *ipv6_tcp_header; + u16 old_dest_port; + u16 new_dest_port; + u16 src_port; + nat64_tcp_events tcp_event; + } v6_tcp_info; + } l4_u; + + l4_frag_info_t *frag_info; /* port for tcp/udp, ident - icmp */ + + /* Need to add counters here */ + union { + nat64_v4_to_v6_tcp_counter_t *tcp_counter; + nat64_v4_to_v6_udp_counter_t *udp_counter; + nat64_v4_to_v6_icmp_counter_t *icmp_counter; + nat64_v4_to_v6_icmp_error_counter_t *icmp_error_counter; + nat64_frag_counter_t *frag_counter; + nat64_options_counter_t *options_counter; + } nat64_ctr_u; + nat64_icmp_gen_counter_t *icmp_gen_counter; + +} nat64_v4_to_v6_pipeline_data_t; + +#endif diff --git a/vnet/vnet/vcgn/nat64_tcp_sm.h b/vnet/vnet/vcgn/nat64_tcp_sm.h new file mode 100644 index 00000000000..3a505bc1649 --- /dev/null +++ b/vnet/vnet/vcgn/nat64_tcp_sm.h @@ -0,0 +1,91 @@ +/* + *------------------------------------------------------------------ + * nat64_tcp_sm.h - Stateful NAT64 translation TCP State machine + * + * Copyright (c) 2011 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#ifndef __NAT64_TCP_SM_H__ +#define __NAT64_TCP_SM_H__ + + +/* TCP States */ +typedef enum { + TCP_CLOSED, + TCP_V4_INIT, + TCP_V6_INIT, + TCP_ESTABLISHED, + TCP_V4_FIN_RCV, + TCP_V6_FIN_RCV, + TCP_V4V6_FIN_RCV, + TCP_TRANS, + TCP_NONE +} nat64_tcp_states; + +/* TCP Events */ +typedef enum { + TCP_TIMEOUT_EV, + TCP_V6_SYN_EV, + TCP_V4_SYN_EV, + TCP_V6_FIN_EV, + TCP_V4_FIN_EV, + TCP_V6_RST_EV, + TCP_V4_RST_EV, + TCP_DEFAULT_EV, + TCP_EV_COUNT +} nat64_tcp_events; + +/* TCP Actions */ +typedef enum { + TCP_FORWARD, + TCP_COND_FORWARD, /* Conditional forward, based on presence of + * session and bib entries */ + TCP_STORE, + TCP_PROBE, + TCP_CREATE_SESSION, + TCP_DELETE_SESSION, + TCP_DROP, + TCP_ACTION_NONE, + TCP_ACTION_COUNT +} nat64_tcp_actions; + +typedef struct { + nat64_tcp_states next_state; + nat64_tcp_actions action; +} nat64_tcp_trans_t; + +typedef struct { + nat64_tcp_trans_t event[TCP_EV_COUNT]; +} nat64_tcp_state_trans_t; + +extern nat64_tcp_state_trans_t nat64_tcp_sm_lookup[TCP_NONE]; + +/* +inline void +nat64_update_v6_to_v4_tcp (nat64_v6_to_v4_pipeline_data_t *pctx_ptr, + nat64_bib_entry_t *bib_ptr); + +inline u8 nat64_v6_to_v4_tcp_perform_action ( + spp_ctx_t *ctx, + nat64_v6_to_v4_pipeline_data_t *pctx_ptr, + nat64_bib_entry_t *bib_db, + nat64_session_entry_t *session_db); + +inline void +nat64_copy_tcp_into_pctx (nat64_v6_to_v4_pipeline_data_t *pctx_ptr); +*/ + + + +#endif diff --git a/vnet/vnet/vcgn/platform_common.h b/vnet/vnet/vcgn/platform_common.h new file mode 100644 index 00000000000..2805b6078ce --- /dev/null +++ b/vnet/vnet/vcgn/platform_common.h @@ -0,0 +1,136 @@ +/* + *--------------------------------------------------------------------------- + * platform_common.h -- file has all platform related macros defined as NULL + * included "platform_common_override.h will have actual + * platform specific defines + * + * Copyright (c) 2011-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +#ifndef __PLATFORM_COMMON_H__ +#define __PLATFORM_COMMON_H__ + +/* $$$ FIXME causes printf format warnings */ +#define PLATFORM_DEBUG_PRINT(...) /* printf(__VA_ARGS__) */ +#define PLATFORM_FILL_DST_ADDR_PORT_TABLE +#define PLATFORM_SET_CTX_RU_TX_FROM_NODE(ctx, value) +#define PLATFORM_SET_CTX_RU_TX_DST_IPPORT_IDX(ctx, value) +#define PLATFORM_SET_CTX_RU_TX_PKT_TYPE(ctx, type) +#define PLATFORM_SET_RX_VRF(ctx, rx_vrf, hardcode, mask) +#define PLATFORM_SET_TX_VRF(ctx, tx_vrf) +/* PLATFORM_CNAT_SET_RX_VRF definition is not same as PLATFORM_SET_RX_VRF, + * So, maintaining two different definitions + */ +#define PLATFORM_CNAT_SET_RX_VRF(ctx, rx_vrf, proto) +#define PLATFORM_CNAT_SET_TX_VRF(ctx, tx_vrf) + +#define PLATFORM_PRINT_TESTING_PG() +#define PLATFORM_INIT_TEMP_SENSORS() +#define PLATFORM_READ_CPU_SENSORS(value) +#define PLATFORM_SET_TEMP_READ_BLOCK(var, val) +#define PLATFORM_NFV9_DISP_NODE_IDX + + +/* Assumption is, syslog packets + * are sent out via same channel as that of NFV9. + * Has to be overridden if this assumption is false. + */ +#define PLATFORM_SYSLOG_DISP_NODE_IDX PLATFORM_NFV9_DISP_NODE_IDX + +#define PLATFORM_CNAT_DB_DUMP_POLICY_PRINT() +#define PLATFORM_PRINT_CTX_VALUES(ctx) +#define PLATFORM_ADD_VRF_MAP_HANDLE_PARTITION(uidb_index, partition_id) +#define PLATFORM_DEL_VRF_MAP_HANDLE_PARTITION(uidb_index, partition_id) +#define PLATFORM_ALLOC_NFV9_PKT_BUFFER(ctx, to_lc_cpu) +#define PLATFORM_SET_DSLITE_ENABLE_FLAG(uidb_index, dslite_id) +#define PLATFORM_CHECK_DSLITE_ENABLE_FLAG + +#define PLATFORM_CNAT_INSTS 1 +#define PLATFORM_HANDLE_TTL_DECREMENT 0 // Don't handle TTL in NAT44 Application (default). + +// For ISM, we need to copy the ipv6->hop_limit to ipv4 ttl. +#define PLATFORM_6RD_COPY_TTL_IPV6_TO_IPV4 0 + +//For ISM case, need to allow as the TTL decrement happens at ingress LC +#define PLATFORM_6RD_ALLOW_TTL_1 0 + +#define PLATFORM_HANDLE_ICMP_TTL_EXPIRED 0 // Don't handle ICMP_ERROR msg for TTL <=1 in NAT44 App (default). + +#define PLATFORM_IPV4_FRAG_FRAG_HOLD_LIMIT 1 +#define PLATFORM_MAX_IPV4_CTX_ENTRIES 1 +#define PLATFORM_MAPE_FRAG 0 + +#define PLATFORM_ADDR_MASK_PER_CORE 0 +#define PLATFORM_ADDR_MASK_PER_CORE_PER_PARTITION 0 +#define PLATFORM_MAX_CORES 1 +#define PLATFORM_MAX_CORES_PER_PARTITION 1 +#define PLATFORM_MAX_NAT_ENTRIES 1 +#define PLATFORM_MAX_USER_ENTRIES 1 +#define PLATFORM_CNAT_MAX_ADDR_POOL_SIZE 0x1 +#define PLATFORM_DBL_SUPPORT 0 /* Default no DBL support, no NAT44 session table */ + +#define PLATFORM_MAX_DB_ENTRY_PER_SCAN 1 +#define PLATFORM_MAX_DB_ENTRY_SELECTED_PER_SCAN 1 +#define MAX_COMBINED_DB_ENTRIES_PER_SCAN 0 + +#define PLATFORM_CNAT_TIMEOUT_IPPROT_MASK 0 +#define PLATFORM_CNAT_TIMEOUT_PORTPROT_MASK 0 + +#define PLATFORM_MAX_SHOW_BUFFER_SIZE 1700 +#define PLATFORM_MAX_TRANSLATION_ENTRIES (50) +#define PLATFORM_MAX_UTIL_ENTRIES (100) +#define PLATFORM_MAX_NAT44_UTIL_ENTRIES ((64)/PLATFORM_MAX_CORES) + +#define PLATFORM_CNAT_NFV9_SHIM_HDR_OFFSET 0 +#define PLATFORM_CNAT_NFV9_L2_ENCAPS_OFFSET 0 + + +/* Below are nat64 statful related define */ +#define PLATFORM_NAT64_SET_RX_VRF(rx_vrf, proto, inst_id) \ + rx_vrf = proto | (inst_id & CNAT_VRF_MASK); + +#define PLATFORM_NAT64_MAX_TRANSLATION_ENTRIES (30) +#define PLATFORM_DS_LITE_MAX_TRANSLATION_ENTRIES (30) + +#define PLATFORM_SET_NAT64_ENABLE_FLAG(uidb_index, nat64_id) \ + { \ + nat64_set_enable_flag(nat64_id, ENABLE); \ + } + +#define PLATFORM_CHECK_NAT64_ENABLE_FLAG 1 +#define PLATFORM_SET_MAPE_ENABLE_FLAG(uidb_index, mape_id) +#define PLATFORM_CHECK_MAPE_ENABLE_FLAG 1 + +/* very small number , PD has correct value. + this is bcoz, if platform doesnt support nat64..shudnt take too much..*/ +#define PLATFORM_NAT64_MAX_SESSIONS 10 +#define PLATFORM_NAT64_TIMEOUT_HASH_SIZE 10 +#define PLATFORM_MAP_ADDR_PER_CORE 1024 + +#define ENABLE 1 +#define DISABLE 0 + +/* Platform Xlat inline learn function */ +#define PLATFORM_INLINE_LEARN_FUNC(a,b,c) + + +/* Checksum calculation to be done in software */ +#define PLATFORM_XLAT_SW_CHECKSUM_CALC 0 + + +/* Below include overrides all the above null defs and defines platform specific + define */ +#include "platform_common_override.h" + +#endif /* __PLATFORM_COMMON_H__ */ diff --git a/vnet/vnet/vcgn/platform_common_override.h b/vnet/vnet/vcgn/platform_common_override.h new file mode 100644 index 00000000000..d6d3b0785b5 --- /dev/null +++ b/vnet/vnet/vcgn/platform_common_override.h @@ -0,0 +1,304 @@ +/* + *--------------------------------------------------------------------------- + * platform_common_override.h -- Files has actual platform specific defines. + * Will only included by platform_common.h + * + * Copyright (c) 2011-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#ifndef __PLATFORM_COMMON_OVERRIDE_H__ +#define __PLATFORM_COMMON_OVERRIDE_H__ + +extern unsigned char my_octeon_id; + +#undef PLATFORM_DBL_SUPPORT +#define PLATFORM_DBL_SUPPORT 1 // Destination Based logging support + // NAT44 session table required. + +#undef PLATFORM_ADDR_MASK_PER_CORE +/* commenting this. Currently we are considering only single core */ +//#define PLATFORM_ADDR_MASK_PER_CORE 0x3f // Using 64 cores +#define PLATFORM_ADDR_MASK_PER_CORE 0x01 + +#undef MAX_COMBINED_DB_ENTRIES_PER_SCAN +#define MAX_COMBINED_DB_ENTRIES_PER_SCAN 128 + +#undef PLATFORM_MAX_CORES +#define PLATFORM_MAX_CORES (PLATFORM_ADDR_MASK_PER_CORE + 1) + +// Roddick does not have any partition of cores +#undef PLATFORM_ADDR_MASK_PER_CORE_PER_PARTITION +#define PLATFORM_ADDR_MASK_PER_CORE_PER_PARTITION \ + PLATFORM_ADDR_MASK_PER_CORE + +#undef PLATFORM_MAX_CORES_PER_PARTITION +#define PLATFORM_MAX_CORES_PER_PARTITION PLATFORM_MAX_CORES + +#undef PLATFORM_CNAT_INSTS +//#define PLATFORM_CNAT_INSTS 64 +#define PLATFORM_CNAT_INSTS 1 /* currently its only single instance */ + +#undef PLATFORM_MAX_NAT_ENTRIES +//#define PLATFORM_MAX_NAT_ENTRIES 20000000 // 20M +#define PLATFORM_MAX_NAT_ENTRIES 1666660 // ~80M/48 (79999680/48) + +#undef PLATFORM_MAX_USER_ENTRIES +#define PLATFORM_MAX_USER_ENTRIES 20800 // ~1M/48 (998400/48) + + +/* 524288: + (20000000 translations) / (64 CNAT INSTANCES) = 312500 + nearest higher number which is power of 2 next to 312500 +*/ +#undef PLATFORM_CNAT_MAIN_PRELIM_HASH_SIZE +//#define PLATFORM_CNAT_MAIN_PRELIM_HASH_SIZE 524288 +#define PLATFORM_CNAT_MAIN_PRELIM_HASH_SIZE (5<<20) +/* 4096: + (200000 users) / (64 CNAT INSTANCES) = 3125 + nearest higher number which is power of 2 next to 3125 +*/ +#undef PLATFORM_CNAT_USER_PRELIM_HASH_SIZE +#define PLATFORM_CNAT_USER_PRELIM_HASH_SIZE 4096 + +#undef PLATFORM_CNAT_MAX_ADDR_POOL_SIZE +#define PLATFORM_CNAT_MAX_ADDR_POOL_SIZE 0x10000 // max /16 + +#undef PLATFORM_MAX_DB_ENTRY_PER_SCAN +#define PLATFORM_MAX_DB_ENTRY_PER_SCAN 400 + +#undef PLATFORM_MAX_DB_ENTRY_SELECTED_PER_SCAN +#define PLATFORM_MAX_DB_ENTRY_SELECTED_PER_SCAN 100 // 1/4th of above + +#undef PLATFORM_CNAT_TIMEOUT_IPPROT_MASK +#define PLATFORM_CNAT_TIMEOUT_IPPROT_MASK 0xFFFFFFFF0000FFFF + +#undef PLATFORM_CNAT_TIMEOUT_PORTPROT_MASK +#define PLATFORM_CNAT_TIMEOUT_PORTPROT_MASK 0x00000000FFFFFFFF + +#ifdef TARGET_RODDICK /* EVB doesnt need it */ +#undef PLATFORM_FILL_DST_ADDR_PORT_TABLE +#define PLATFORM_FILL_DST_ADDR_PORT_TABLE fill_dst_addr_port_table(); +#endif + + +#ifndef RODDICK_ON_LINUX_OR_EVB +#undef PLATFORM_SET_CTX_RU_TX_FROM_NODE +#undef PLATFORM_SET_CTX_RU_TX_DST_IPPORT_IDX +#undef PLATFORM_SET_CTX_RU_TX_PKT_TYPE + +#define PLATFORM_SET_CTX_RU_TX_FROM_NODE(ctx, value) \ + (vnet_buffer(ctx))->vcgn_uii.ru.tx.from_node = value; +#define PLATFORM_SET_CTX_RU_TX_DST_IPPORT_IDX(ctx, value) \ + (vnet_buffer(ctx))->vcgn_uii.ru.tx.dst_ip_port_idx = value; +#define PLATFORM_SET_CTX_RU_TX_PKT_TYPE(ctx, type) \ + (vnet_buffer(ctx))->vcgn_uii.ru.tx.packet_type = type; +#endif + +#undef PLATFORM_SET_RX_VRF +#undef PLATFORM_SET_TX_VRF +#ifdef TARGET_RODDICK +#define PLATFORM_SET_RX_VRF(ctx, rx_vrf, hardcode, mask) \ + rx_vrf = (ctx->ru.rx.uidb_index & CNAT_VRF_MASK); +#define PLATFORM_SET_TX_VRF(ctx, tx_vrf) \ + ctx->ru.tx.uidb_index = tx_vrf; +#else /*EVB */ +#define PLATFORM_SET_RX_VRF(ctx, rx_vrf, hardcode, mask) \ + rx_vrf = hardcode; +#define PLATFORM_SET_TX_VRF(ctx, tx_vrf) +#endif + +#undef PLATFORM_CNAT_SET_RX_VRF +#undef PLATFORM_CNAT_SET_TX_VRF + +#define PLATFORM_CNAT_SET_RX_VRF(if_index, rx_vrf, proto) \ + rx_vrf = proto | ((if_index) & CNAT_VRF_MASK); + +#define PLATFORM_CNAT_SET_TX_VRF(if_index, tx_vrf) \ + (if_index) = ((tx_vrf) & CNAT_VRF_MASK); + + + +#undef PLATFORM_NAT64_SET_RX_VRF + +#ifdef TARGET_RODDICK + +#define PLATFORM_NAT64_SET_RX_VRF(rx_vrf, proto, inst_id) \ + rx_vrf = proto | (inst_id & CNAT_VRF_MASK); + +#else /* EVB */ + +#define PLATFORM_NAT64_SET_RX_VRF(rx_vrf, proto, inst_id) \ + rx_vrf = proto | inst_id; + +#endif + +#ifdef TARGET_EVB +#define VRF_MAP_CONFIG +#endif + +#undef PLATFORM_PRINT_TESTING_PG +#if defined(TARGET_LINUX_UDVR) || defined(CNAT_PG) +#define PLATFORM_PRINT_TESTING_PG() printf("testing pg\n"); +#else +#define PLATFORM_PRINT_TESTING_PG() +#endif + +#ifdef TARGET_RODDICK +#undef PLATFORM_INIT_TEMP_SENSORS +#undef PLATFORM_READ_CPU_SENSORS +#undef PLATFORM_SET_TEMP_READ_BLOCK + +#define PLATFORM_INIT_TEMP_SENSORS() Init_temperature_sensors(); +#define PLATFORM_READ_CPU_SENSORS(value) read_octeon_sensors(value); +#define PLATFORM_SET_TEMP_READ_BLOCK(var, val) var = &val->param[0]; +#endif + +#undef PLATFORM_HANDLE_ICMP_TTL_EXPIRED +#define PLATFORM_HANDLE_ICMP_TTL_EXPIRED 1 // handle TTL in NAT44 Application (for AVSM) + +#undef PLATFORM_NFV9_DISP_NODE_IDX +#ifdef TARGET_RODDICK +#define PLATFORM_NFV9_DISP_NODE_IDX "roddick_infra_l3_tx" +#else /* EVB */ +#define PLATFORM_NFV9_DISP_NODE_IDX "cnat_rewrite_output" +#endif + +#undef PLATFORM_CNAT_DB_DUMP_POLICY_PRINT +#define PLATFORM_CNAT_DB_DUMP_POLICY_PRINT() \ + printf("my instance:%d\n" \ + "icmp timeout %d udp init timeout %d act timeout %d\n" \ + "tcp init timeout %d act timeout %d mapping refresh %d\n" \ + "port limit per user %d ftp alg %d lb debug %d\n" \ + "icmp rate limit 0x%x config delete timer 0x%x\n" \ + "global debug flag 0x%x\n" \ + "icmp rate limit (pkts/per sec) %d\n" \ + "dynamic port range start %d\n" \ + "debug ivrf 0x%x flag 0x%x start_addr 0x%x end_addr 0x%x\n" \ + "debug ovrf 0x%x flag 0x%x start_addr 0x%x end_addr 0x%x\n", \ + my_instance_number, \ + icmp_session_timeout, udp_init_session_timeout, udp_act_session_timeout, \ + tcp_initial_setup_timeout, tcp_active_timeout, \ + mapping_refresh_both_direction, cnat_main_db_max_ports_per_user, \ + ftp_alg_enabled, lb_debug_enable, per_user_icmp_msg_limit, \ + config_delete_timeout, \ + global_debug_flag, \ + cnat_main_db_icmp_rate_limit, \ + cnat_static_port_range, \ + debug_i_vrf, debug_i_flag, debug_i_addr_start, debug_i_addr_end, \ + debug_o_vrf, debug_o_flag, debug_o_addr_start, debug_o_addr_end); + + +#undef PLATFORM_PRINT_CTX_VALUES +#ifdef TARGET_RODDICK +#define PLATFORM_PRINT_CTX_VALUES(ctx) \ + printf("\nAFTER: %s cur_hdr %p, uidb %d, pkt_type %d, cur_len %d\n", \ + type_str, \ + ctx->current_header, \ + ctx->ru.tx.uidb_index, \ + ctx->ru.tx.packet_type, \ + ctx->current_length); +#else /* EVB */ +#define PLATFORM_PRINT_CTX_VALUES(ctx) \ + printf("\nAFTER: %s cur_hdr %p, cur_len %d\n", \ + type_str,\ + ctx->current_header, \ + ctx->current_length); +#endif + +#undef PLATFORM_ADD_VRF_MAP_HANDLE_PARTITION +#define PLATFORM_ADD_VRF_MAP_HANDLE_PARTITION(uidb_index, partition_id) + +#undef PLATFORM_DEL_VRF_MAP_HANDLE_PARTITION +#define PLATFORM_DEL_VRF_MAP_HANDLE_PARTITION(uidb_index, partition_id) + +#undef PLATFORM_ALLOC_NFV9_PKT_BUFFER +#define PLATFORM_ALLOC_NFV9_PKT_BUFFER(ctx, to_lc_cpu) + +#undef PLATFORM_CNAT_NFV9_SHIM_HDR_OFFSET +#ifdef TARGET_RODDICK +// This corresponds to the length of the IMETRO SHIM Header for RODDICK +#define PLATFORM_CNAT_NFV9_SHIM_HDR_OFFSET 8 +#else +#define PLATFORM_CNAT_NFV9_SHIM_HDR_OFFSET 0 +#endif + +#undef PLATFORM_CNAT_NFV9_L2_ENCAPS_OFFSET +#ifdef TARGET_RODDICK +#define PLATFORM_CNAT_NFV9_L2_ENCAPS_OFFSET 0 +#else +#define PLATFORM_CNAT_NFV9_L2_ENCAPS_OFFSET 16 +#endif + +#undef PLATFORM_MAX_SHOW_BUFFER_SIZE +#undef PLATFORM_MAX_TRANSLATION_ENTRIES +#undef PLATFORM_MAX_UTIL_ENTRIES + +#define PLATFORM_MAX_SHOW_BUFFER_SIZE 1700 +#define PLATFORM_MAX_TRANSLATION_ENTRIES (50) +#define PLATFORM_NAT64_MAX_TRANSLATION_ENTRIES (30) +#define PLATFORM_MAX_UTIL_ENTRIES (100) + + +#undef PLATFORM_NAT64_MAX_SESSIONS +#undef PLATFORM_NAT64_TIMEOUT_HASH_SIZE +#define PLATFORM_NAT64_MAX_SESSIONS 20000000 +#define PLATFORM_NAT64_TIMEOUT_HASH_SIZE 24001 /* Ref: CSCtr36242 */ + +#undef PLATFORM_CHECK_DSLITE_ENABLE_FLAG +#define PLATFORM_CHECK_DSLITE_ENABLE_FLAG 1 + +/* Fragment hold limit is Platform specific */ +/* For Roddick, it is 63 due to hardware limitation */ +#undef PLATFORM_IPV4_FRAG_FRAG_HOLD_LIMIT +#define PLATFORM_IPV4_FRAG_FRAG_HOLD_LIMIT 63 + +#undef PLATFORM_MAX_IPV4_CTX_ENTRIES +#define PLATFORM_MAX_IPV4_CTX_ENTRIES 80 + +#undef PLATFORM_DIRN_IN_TO_OUT +#undef PLATFORM_DIRN_OUT_TO_IN +#undef PLATFORM_SET_SVI_PARAMS_FIELD + +#define PLATFORM_DIRN_IN_TO_OUT +#define PLATFORM_DIRN_OUT_TO_IN +#define PLATFORM_SET_SVI_PARAMS_FIELD(var, value) + +#undef PLATFORM_GET_NFV9_L3_HDR_OFFSET +#define PLATFORM_GET_NFV9_L3_HDR_OFFSET \ + ((u8 *)ctx + ctx->data + CNAT_NFV9_IP_HDR_OFFSET); + +#undef PLATFORM_GET_NFV9_L4_HDR_OFFSET +#define PLATFORM_GET_NFV9_L4_HDR_OFFSET \ + ((u8 *) ctx + ctx->data + CNAT_NFV9_UDP_HDR_OFFSET); + +#undef PLATFORM_MEMSET_CNAT_LOG_PKT_DATA +#define PLATFORM_MEMSET_CNAT_LOG_PKT_DATA + +/* + Index 0 -- SE_P2MP + Index 1 -- HA Destination 1 + Index 2 -- HA Destination 2 + Index 3 -- EXT_LOG_SRVR +*/ +enum { + NODE_CGNCFG, + NODE_HA, + NODE_PD_CONFIG, + NODE_LOGGING, + NODE_TRACE_BACKUP, + NODE_MAX, +}; + +#endif /* __PLATFORM_COMMON_OVERRIDE_H__ */ diff --git a/vnet/vnet/vcgn/spp_ctx.h b/vnet/vnet/vcgn/spp_ctx.h new file mode 100644 index 00000000000..2d3c95c8887 --- /dev/null +++ b/vnet/vnet/vcgn/spp_ctx.h @@ -0,0 +1,76 @@ +/* + *------------------------------------------------------------------ + * spp_ctx.h - packet / context definitions + * + * Copyright (c) 2007-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __SPP_CTX_H__ +#define __SPP_CTX_H__ + +/* Packet header / data */ + +/* Any change to spp_ctx_t structure should be updated in vnet/buffer.h + * as well. + */ +typedef struct _spp_ctx { +#ifdef TOBE_PORTED + /* Following fields are required to handle multibuffer */ + u32 num_buffers; /* Number of buffers part of packet */ + vlib_buffer_t *next_ctx_this_packet; + + /* Following is used by non-UDP protocols */ +#define SPP_CTX_FEATURE_DATA_SIZE 16 + + u8 feature_data[SPP_CTX_FEATURE_DATA_SIZE]; +#endif + + union { /* Roddick specific */ + u32 roddick_info; + struct __tx_pkt_info { /* Used by PI to PI communication for TX */ + u32 uidb_index:16; /* uidb_index to transmit */ + u32 packet_type:2; /* 1-IPv4, 2-Ipv6, - 0,3 - Unused */ + u32 ipv4_defrag:1; /* 0 - Normal, 1 - update first + * segment size + * (set by 6rd defrag node) + */ + + u32 dst_ip_port_idx:4;/* Index to dst_ip_port_table */ + u32 from_node:4; + u32 calc_chksum:1; + u32 reserved:4; + } tx; + struct __rx_pkt_info { /* Used by PD / PI communication */ + u32 uidb_index:16; /* uidb_index received in packet */ + u32 packet_type:2; /* 1-IPv4, 2-Ipv6, - 0,3 - Unused */ + u32 icmp_type:1; /* 0-ICMP query type, 1-ICMP error type */ + u32 protocol_type:2; /* 1-TCP, 2-UDP, 3-ICMP, 0 - Unused */ + u32 ipv4_defrag:1; /* 0 - Normal, 1 - update first + * segment size + * (set by 6rd defrag node) + */ + + u32 direction:1; /* 0-Outside, 1-Inside */ + u32 frag:1; /*IP fragment-1, Otherwise-0*/ + u32 option:1; /* 0-No IP option (v4) present, non-fragHdr + * option hdr present (v6) + */ + u32 df_bit:1; /* IPv4 DF bit copied here */ + u32 reserved1:6; + } rx; + } ru; +} spp_ctx_t; + +#endif diff --git a/vnet/vnet/vcgn/spp_platform_trace_log.c b/vnet/vnet/vcgn/spp_platform_trace_log.c new file mode 100644 index 00000000000..2c119f06ba2 --- /dev/null +++ b/vnet/vnet/vcgn/spp_platform_trace_log.c @@ -0,0 +1,991 @@ +/* + *------------------------------------------------------------------ + * spp_platform_trace_log.c + * + * Copyright (c) 2008-2011, 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ + +#include <vlib/vlib.h> +#include <stdio.h> +#include <vppinfra/vec.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> +#include <vppinfra/clib.h> +#include <vlib/main.h> + +#include "tcp_header_definitions.h" +#include "platform_common.h" +#include "spp_platform_trace_log.h" + +#define WORD_SIZE sizeof(u32) + +int temperature_read_blocked = 1; + +spp_cnat_logger_tbl_t spp_cnat_logger_table[] = +{ + { CNAT_ERROR_SUCCESS, + 3, + 0, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_NO_CONFIG_ERROR, + 3, + 180, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_NO_VRF_RUN_ERROR, + 3, + 180, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_NO_POOL_FOR_ANY_ERROR, + 3, + 180, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_NO_PORT_FOR_ANY_ERROR, + 3, + 60, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_BAD_INUSE_ANY_ERROR, + 3, + 60, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_NOT_FOUND_ANY_ERROR, + 3, + 60, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_INV_PORT_FOR_DIRECT_ERROR, + 3, + 60, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_BAD_INUSE_DIRECT_ERROR, + 3, + 1, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_NOT_FOUND_DIRECT_ERROR, + 3, + 1, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_OUT_OF_PORT_LIMIT_ERROR, + 3, + 60, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_MAIN_DB_CREATE_ERROR, + 0, + 30, + {""} + }, + { CNAT_LOOKUP_ERROR, + 1, + 30, + {"Type"} + }, + { CNAT_INDEX_MISMATCH_ERROR, + 2, + 30, + {"in2out_index", + "out2in_index"} + }, + { CNAT_PACKET_DROP_ERROR, + 3, + 15, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_INV_UNUSED_USR_INDEX, + 1, + 10, + {"invalid/unused user index"} + }, + { CNAT_INVALID_VRFMAP_INDEX, + 0, + 60, + {""} + }, + { CNAT_USER_OUT_OF_PORTS, + 2, + 1800, + {"i-vrf", + "ipv4 addr"} + }, + { CNAT_EXT_PORT_THRESH_EXCEEDED, + 2, + 180, + {"i-vrf", + "ipv4 address"} + }, + { CNAT_EXT_PORT_THRESH_NORMAL, + 2, + 180, + {"vrf", + "ipv4 address"} + }, + { CNAT_NO_EXT_PORT_AVAILABLE, + 0, + 1, + {"",} + }, + { CNAT_SESSION_THRESH_EXCEEDED, + 2, + 1800, + {"vrf", + "ipv4 address"} + }, + { CNAT_SESSION_THRESH_NORMAL, + 2, + 30, /* changed to 30 */ + {"vrf", + "ipv4 address"} + }, + { WQE_ALLOCATION_ERROR, + 0, + 180, /* changed to 180 */ + {""} + }, + { ERROR_PKT_DROPPED, + 2, + 60, /* changed to 60 */ + {"spi-port", + "error-code"} + }, + { SYSMGR_PD_KEY_CREATION_ERROR, + 0, + 30, + {""} + }, + { SYSMGR_PD_SHMEM_ID_ERROR, + 0, + 1, + {""} + }, + { SYSMGR_PD_SHMEM_ATTACH_ERROR, + 0, + 1, + {""} + }, + { OCTEON_CKHUM_SKIPPED, + 2, + 60, /* changed to 60 */ + {"version", + "protocol"} + }, + { PK0_SEND_STATUS, + 1, + 15, + {"status"} + }, + { CMD_BUF_ALLOC_ERR, + 0, + 60, + {""} + }, + { SPP_CTX_ALLOC_FAILED, + 1, + 300, /* every 5 min */ + {"node"} + }, + { SPP_MAX_DISPATCH_REACHED, + 1, + 60, + {"node"} + }, + { HA_SIGCHILD_RECV, + 3, + 1, + {"pid", + "uid", + "signal",} + }, + { SIGACTION_ERR, + 0, + 1, + {""} + }, + { HA_INVALID_SEQ_OR_CONFIG_OR_TYPE, + 2, + 10, + {"seq-id or config option", + "Type"} + }, + { NODE_CREATION_ERROR, + 1, + 1, + {"node"} + }, + + { CNAT_CLI_INVALID_INPUT, + 4, + 0, + {"Error Type", + "Passed", + "Expected", + "Type"} + }, + { CNAT_DUMMY_HANDLER_HIT, + 1, + 0, + {"Handler"} + }, + { CNAT_CONFIG_ERROR, + 5, + 0, + {"Sub code", + "Param 1", + "Param 2", + "Param 3", + "Param 4"} + }, + { CNAT_NFV9_ERROR, + 1, + 180, /* changed to 180 */ + {"Sub code"} + }, + { CNAT_CMVX_TWSI_READ_WRITE_FAIL, + 3, + 180, + {"Operation", + "Location", + "Data"} + }, + { CNAT_TEMP_SENSOR_TIMEOUT, + 0, + 180, + {""} + }, + { CNAT_TEMP_SENSOR_DATA_MISMATCH, + 2, + 180, + {"Actual", + "Expected"} + }, + { CNAT_TEMP_SENSOR_CONFIG_FAILED, + 1, + 180, + {"Glik"} + }, + { HA_APP_NOT_RESPONDING, + 2, + 180, + {"CPU", + "Core"} + }, + { HA_DATA_PATH_TEST_FAILED, + 0, + 30, + {""} + }, + { CNAT_WRONG_PORT_ALLOC_TYPE, + 3, + 60, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_NEW_PORT_ALLOC_ERROR, + 3, + 60, + {"i-vrf", + "ipv4 addr", + "port"} + }, + { CNAT_INVALID_INDEX_TO_FREE_PORT, + 0, + 60, + {""} + }, + { CNAT_DELETE_DB_ENTRY_NO_PORTMAP, + 0, + 60, + {""} + }, + { CNAT_MAIN_DB_LIMIT_ERROR, + 0, + 180, + {""} + }, + { CNAT_USER_DB_LIMIT_ERROR, + 0, + 180, + {""} + }, + { CNAT_FRAG_DB_ERROR, + 1, + 180, + {"Type"} + }, + + { DROP_PKT_DUMP, + 0, + 20, + {""} + } +}; + +#define LOG_TABLE_MAX_ENTRIES \ + (sizeof(spp_cnat_logger_table)/sizeof(spp_cnat_logger_table[0])) + +u32 error_code_timestamps[LOG_TABLE_MAX_ENTRIES]; +spp_timer_t sensor_timer; +spp_trace_log_global_info_t spp_trace_log_global_info; +spp_global_counters_t spp_global_counters; + +/* + * Logging information structures + */ +spp_trace_log_info_t spp_default_trace_log_info; +spp_trace_log_info_t *spp_trace_log_info_pool; + +#ifdef TOBE_PORTED +/* + * The following 2 functions are temporary hacks until + * we have RTC support from the PD nodes + */ +inline +u32 spp_trace_log_get_sys_up_time_in_ms (void) +{ + spp_node_main_vector_t *nmv; + u32 sys_up_time; + + nmv = spp_get_node_main_vectorized_inline(); + + sys_up_time = (u32) (nmv->ticks / nmv->ticks_per_ms); + + return (sys_up_time); +} + +inline +u32 spp_trace_log_get_unix_time_in_seconds (void) +{ + spp_node_main_vector_t *nmv; + u32 unix_time; + + nmv = spp_get_node_main_vectorized_inline(); + + unix_time = (u32) (nmv->ticks / nmv->ticks_per_second); + + return (unix_time); +} + +/* + * edt: * * spp_trace_log_send_queued_pkt + * + * Tries to send a logging pkt that has been queued earlier + * because it could not be sent due to downstream constipation + * + * Argument: spp_trace_log_info_t *trace_logging_info + * structure that contains the packet context + */ +inline +void spp_trace_log_send_queued_pkt (spp_trace_log_info_t *trace_logging_info) +{ + spp_node_t *output_node; + + output_node = spp_get_nodes() + + spp_trace_log_global_info.spp_trace_log_disp_node_index; + + if (PREDICT_TRUE(output_node->sf.nused < SPP_MAXDISPATCH)) { + /* + * Move the logging context to output node + */ + spp_dispatch_make_node_runnable(output_node); + output_node->sf.ctxs[output_node->sf.nused++] = + trace_logging_info->queued_logging_context; + + /* + * Context has been queued, it will be freed after the pkt + * is sent. Clear this from the logging_context_info structure + */ + trace_logging_info->queued_logging_context = NULL; + + } else { + /* + * Can't do much, just return, may be we can send it later + */ + spp_global_counters.spp_trace_log_downstream_constipation_count++; + } +} + +/* + * edt: * * spp_trace_log_send_pkt + * + * Tries to send a logging pkt. If the packet cannot be sent + * because of rewrite_output node cannot process it, queue + * it temporarily and try to send it later. + * + * Argument: spp_trace_log_info_t *trace_logging_info + * structure that contains the packet context + */ +inline +void spp_trace_log_send_pkt (spp_trace_log_info_t *trace_logging_info) +{ + spp_node_t *output_node; + + + output_node = spp_get_nodes() + + spp_trace_log_global_info.spp_trace_log_disp_node_index; + + if (PREDICT_TRUE(output_node->sf.nused < SPP_MAXDISPATCH)) { + /* + * Move the logging context to output node + */ + spp_dispatch_make_node_runnable(output_node); + output_node->sf.ctxs[output_node->sf.nused++] = + trace_logging_info->current_logging_context; + + } else { + /* + * Queue the context into the logging_info structure, + * We will try to send it later. Currently, we will + * restrict to only one context queued. + */ + spp_global_counters.spp_trace_log_downstream_constipation_count++; + + /* + * Attach the current logging context which is full to the + * queued context list in trace_logging_info structure + */ + trace_logging_info->queued_logging_context = + trace_logging_info->current_logging_context; + + /* + * Whether the context is queued or not, set the current context index + * to EMPTY, as the earlier context can no more be used to send + * more logging records. + */ + } + + trace_logging_info->current_logging_context = NULL; +} + +/* + * edt: * * spp_trace_log_send_pkt_always_success + * + * Tries to send a logging pkt. This cannot fail due to downstream + * constipation because we have already checked if the rewrite_output + * node can accept it. + * + * Argument: spp_trace_log_info_t *trace_logging_info + * structure that contains the packet context + * + * Argument: spp_node_t *output_node + * spp_node_t structure for rewrite_output node + */ +inline +void spp_trace_log_send_pkt_always_success ( + spp_trace_log_info_t *trace_logging_info, + spp_node_t *output_node) +{ + /* + * At this point we either have a current or queued logging context + */ + if (PREDICT_TRUE(trace_logging_info->current_logging_context != NULL)) { + + output_node->sf.ctxs[output_node->sf.nused++] = + trace_logging_info->current_logging_context; + + trace_logging_info->current_logging_context = NULL; + } else { + /* + * For queued logging context + */ + output_node->sf.ctxs[output_node->sf.nused++] = + trace_logging_info->queued_logging_context; + + trace_logging_info->queued_logging_context = NULL; + } + + /* + * Move the logging context to output node + */ + spp_dispatch_make_node_runnable(output_node); + +} + +/* + * edt: * * spp_create_trace_log_context + * + * Tries to create a logging context with packet buffer + * to send a new logging packet + * + * Argument: spp_trace_log_info_t *trace_logging_info + * structure that contains the nfv9 logging info and will store + * the packet context as well. + */ +inline +void spp_create_trace_log_context ( + spp_trace_log_info_t *trace_logging_info) +{ + spp_ctx_t *ctx; + + /* + * If queued_logging_context_index is non-EMPTY, we already have a logging + * packet queued to be sent. First try sending this before allocating + * a new context. We can have only one active packet context per + * trace_logging_info structure + */ + if (PREDICT_FALSE(trace_logging_info->queued_logging_context != NULL)) { + spp_trace_log_send_queued_pkt(trace_logging_info); + /* + * If we cannot still send the queued pkt, just return + * Downstream Constipation count would have increased anyway + */ + if (trace_logging_info->queued_logging_context != NULL) { + spp_global_counters.spp_trace_log_context_creation_deferred_count++; + return; + } + } + + + /* + * No context can be allocated, return silently + * calling routine will handle updating the error counters + */ + if (spp_ctx_alloc(&ctx, 1) < 1) { + spp_global_counters.spp_trace_log_context_creation_fail_count++; + return; + } + + trace_logging_info->current_logging_context = ctx; + trace_logging_info->pkt_length = 0; + + trace_logging_info->current_logging_context_timestamp = + spp_trace_log_get_sys_up_time_in_ms(); + + ctx->flags = SPP_CTX_END_OF_PACKET; + ctx->ru.tx.from_node = NODE_TRACE_BACKUP; + ctx->ru.tx.dst_ip_port_idx = EXT_TRACE_BACKUP_INDEX; + ctx->next_ctx_this_packet = (spp_ctx_t*) SPP_CTX_NO_NEXT_CTX; + ctx->current_header = &ctx->packet_data[SPP_TRACE_LOG_HDR_OFFSET]; + ctx->current_length = 0; + + trace_logging_info->log_record = 0; + trace_logging_info->total_record_count = 0; + trace_logging_info->next_data_ptr = + (u8 *) &ctx->packet_data[SPP_TRACE_LOG_HDR_OFFSET]; + +} + +/* + * edt: * * spp_trace_log_add_record_create + * + * Tries to create an add record to the NFV9 packet + * + * Argument: spp_trace_log_info_t *trace_logging_info + * structure that contains the nfv9 logging info and will store + * the packet context as well. + */ +inline +void spp_trace_log_add_record_create (spp_trace_log_info_t *trace_logging_info) +{ + + trace_logging_info->log_header = + (spp_trace_log_hdr_t *) (trace_logging_info->next_data_ptr); + + /* + * Initialize the number of traces recorded + */ + trace_logging_info->log_header->num_traces = + spp_host_to_net_byte_order_32(0); + + + trace_logging_info->log_record = + (spp_trace_log_t *) (trace_logging_info->log_header + 1); + + /* + * Update the length of the total pkt + */ + trace_logging_info->pkt_length += + SPP_LOG_TRACE_HEADER_LENGTH; + + /* + * Set the data pointer beyond the trace header field + */ + trace_logging_info->next_data_ptr = + (u8 *) (trace_logging_info->log_header + 1); + +} + +/* + * edt: * * spp_trace_logger + * + * Tries to log spp/cnat event/errors + * + * Argument: u8 *error_code + * Error code passed + * + * Argument: optional arguments + */ +void spp_trace_logger (u16 error_code, u16 num_args, u32 *arg) +{ + spp_trace_log_info_t *trace_logging_info = 0; + u8 i; + + trace_logging_info = + spp_trace_log_info_pool + + spp_trace_log_global_info.spp_log_pool_index[SPP_LOG_LTRACE]; + + if (PREDICT_FALSE(trace_logging_info->current_logging_context == NULL)) { + spp_create_trace_log_context(trace_logging_info); + + /* + * If still empty, return after increasing the count + */ + if (PREDICT_FALSE(trace_logging_info->current_logging_context == NULL)) { + return; + } + } + + if (PREDICT_FALSE(trace_logging_info->log_record == NULL)) { + spp_trace_log_add_record_create(trace_logging_info); + } + + /* + * We should definitely have add_record now, no need to sanitize + */ + trace_logging_info->log_record->error_code = + spp_host_to_net_byte_order_16(error_code); + trace_logging_info->log_record->num_args = + spp_host_to_net_byte_order_16(num_args); + + for (i = 0; i < num_args; i++) { + trace_logging_info->log_record->arg[i] = + spp_host_to_net_byte_order_32(*(arg + i)); + } + + trace_logging_info->pkt_length += SPP_TRACE_LOG_RECORD_LENGTH + WORD_SIZE*num_args; + trace_logging_info->current_logging_context->current_length = + trace_logging_info->pkt_length; + trace_logging_info->total_record_count += 1; + + trace_logging_info->next_data_ptr = + (u8 *) (trace_logging_info->next_data_ptr + WORD_SIZE + WORD_SIZE*num_args); + + trace_logging_info->log_record = + (spp_trace_log_t *) (trace_logging_info->next_data_ptr); + + /* + * Initialize the number of traces recorded + */ + trace_logging_info->log_header->num_traces = + spp_host_to_net_byte_order_32(trace_logging_info->total_record_count); + + + + /* + * If we have exceeded the packet length, let us send the + * packet now. There is buffer of additional bytes beyond + * max_pkt_length to ensure that the last add/delete record + * can be stored safely. + */ + if (trace_logging_info->pkt_length > + trace_logging_info->max_length_minus_max_record_size) { + spp_trace_log_send_pkt(trace_logging_info); + } +} + + +/* + * edt: * * spp_trace_log_timer_handler + * + * Timer handler for sending any pending NFV9 record + * + * Argument: spp_timer_t * timer_p + * Timer handler structure + */ +inline +void spp_trace_log_timer_handler (spp_timer_t * timer_p) +{ + spp_node_t *output_node; + spp_trace_log_info_t *trace_logging_info = 0; + u32 current_timestamp = spp_trace_log_get_sys_up_time_in_ms(); + i16 sf_nused; + + output_node = spp_get_nodes() + + spp_trace_log_global_info.spp_trace_log_disp_node_index; + + sf_nused = output_node->sf.nused; + + pool_foreach (trace_logging_info, spp_trace_log_info_pool, ({ + /* + * Check if no more logging contexts can be queued + */ + if (PREDICT_FALSE(sf_nused >= SPP_MAXDISPATCH)) { + break; + } + + /* + * If there is a current logging context and timestamp + * indicates it is pending for long, send it out + * Also if there is a queued context send it out as well + */ + if (trace_logging_info->queued_logging_context || + (trace_logging_info->current_logging_context && + (current_timestamp - + trace_logging_info->current_logging_context_timestamp) + > 1000)) { + spp_trace_log_send_pkt_always_success(trace_logging_info, + output_node); + sf_nused++; + } + })); + + timer_p->expires = + spp_timer_in_n_ms_inline(1000); /* every 1 sec */ + spp_timer_start(timer_p); + +} +inline +void spp_sensor_timer_handler (spp_timer_t * timer_p) +{ +#ifdef TARGET_RODDICK + if (!temperature_read_blocked) { + Init_temperature_sensors(); + read_octeon_sensors(TEMPERATURE_SENSOR_QUIET_MODE); + } + + timer_p->expires = + spp_timer_in_n_ms_inline(60000); /* every 1 sec */ + spp_timer_start(timer_p); + +#endif +} +void init_trace_log_buf_pool (void) +{ + spp_trace_log_info_t *my_spp_log_info; + u8 found; + spp_log_type_t log_type; + + /* + * Init SPP logging info as needed, this will be done only once + */ + spp_trace_log_init(); + + found = 0; + + for (log_type = SPP_LOG_LTRACE; log_type < SPP_LOG_MAX; log_type++ ) { + /* Do we already have a map for this log type? */ + pool_foreach (my_spp_log_info, spp_trace_log_info_pool, ({ + if (my_spp_log_info->log_type == log_type) { + found = 1; + break; + } + })); + + /* + * Entry not present + */ + if (!found) { + pool_get(spp_trace_log_info_pool, my_spp_log_info); + memset(my_spp_log_info, 0, sizeof(*my_spp_log_info)); + + /* + * Make the current and head logging context indeices as EMPTY. + * When first logging happens, these get set correctly + */ + my_spp_log_info->current_logging_context = NULL; + my_spp_log_info->queued_logging_context = NULL; + + my_spp_log_info->log_type = log_type; + my_spp_log_info->max_length_minus_max_record_size = + SPP_TRACE_LOG_MAX_PKT_LENGTH; + + spp_trace_log_global_info.spp_log_pool_index[log_type] = + my_spp_log_info - spp_trace_log_info_pool; + } + + } + + return; +} + + +/* + * one time function + * has to be called at the init time + */ +void spp_trace_log_init (void) +{ + if (!spp_trace_log_global_info.spp_trace_log_init_done) { + +#ifdef TARGET_RODDICK + spp_trace_log_global_info.spp_trace_log_disp_node_index = + spp_lookup_node_index("roddick_infra_l3_tx"); +#elif defined(TARGET_BOOSTER) + spp_trace_log_global_info.spp_trace_log_disp_node_index = + spp_lookup_node_index("booster_infra_l3_tx"); +#endif + ASSERT(spp_trace_log_global_info.spp_trace_log_disp_node_index != (u16)~0); + + spp_trace_log_global_info.log_timer.cb_index = + spp_timer_register_callback(spp_trace_log_timer_handler); + spp_trace_log_global_info.log_timer.expires = + spp_timer_in_n_ms_inline(1000); /* every 1 sec */ + spp_timer_start(&spp_trace_log_global_info.log_timer); + + if (!my_core_id) { + sensor_timer.cb_index = + spp_timer_register_callback(spp_sensor_timer_handler); + sensor_timer.expires = + spp_timer_in_n_ms_inline(60000); /* every 1 sec */ + spp_timer_start(&sensor_timer); + } + + spp_trace_log_global_info.spp_trace_log_init_done = 1; + + /* + * Set MSC ip_addr, port values + */ +#ifdef TARGET_RODDICK + dst_ipv4_port_table[EXT_TRACE_BACKUP_INDEX].ipv4_address = + vpp_boot_params.msc_ip_address; + switch(vpp_boot_params.octeon_number) { + case 0: + dst_ipv4_port_table[EXT_TRACE_BACKUP_INDEX].port = 0x15BF; + break; + case 1: + dst_ipv4_port_table[EXT_TRACE_BACKUP_INDEX].port = 0x15BF; + break; + case 2: + dst_ipv4_port_table[EXT_TRACE_BACKUP_INDEX].port = 0x15BF; + break; + case 3: + dst_ipv4_port_table[EXT_TRACE_BACKUP_INDEX].port = 0x15BF; + break; + } +#else + dst_ipv4_port_table[EXT_TRACE_BACKUP_INDEX].ipv4_address = 0x01020304; + dst_ipv4_port_table[EXT_TRACE_BACKUP_INDEX].port = 0x15BF; +#endif + + } +} + +void spp_printf (u16 error_code, u16 num_args, u32 *arg) +{ + u32 current_timestamp; + spp_node_main_vector_t *nmv; + + if (PREDICT_FALSE(error_code >= LOG_TABLE_MAX_ENTRIES)) + { + /* printf("Error code invalid %d, %d, %d, %d\n", + error_code, LOG_TABLE_MAX_ENTRIES, + sizeof(spp_cnat_logger_table), sizeof(spp_cnat_logger_table[0])); + */ + return; /* Should not happen */ + } + + nmv = spp_get_node_main_vectorized_inline(); + current_timestamp = nmv->ticks / nmv->ticks_per_second; + + /* Check if any further hashing is required */ + + if (PREDICT_FALSE(error_code == DUMP_PKT_IDX)) { +#ifdef TARGET_RODDICK || defined(TARGET_BOOSTER) + spp_trace_logger(error_code, num_args, arg); +#else + u8 j ; + + printf("PKT DUMP :: "); + for (j = 0 ; j < num_args; j++) { + printf("0x%x ", arg[j]); + if (j == (num_args - 1)) { + printf("\n"); + } + } +#endif + } else if (PREDICT_TRUE((current_timestamp - error_code_timestamps[error_code]) >= + spp_cnat_logger_table[error_code].rate_limit_time)) { + /* update timestamp */ + error_code_timestamps[error_code] = current_timestamp; + +#ifdef TARGET_RODDICK || defined(TARGET_BOOSTER) + spp_trace_logger(error_code, num_args, arg); +#else + u8 j ; + + for (j = 0 ; j < num_args; j++) { + printf("%s: %d ", spp_cnat_logger_table[error_code].param_name[j], arg[j]); + if (j == (num_args - 1)) { + printf("\n"); + } + } +#endif + } +} + +#else /* TOBE_PORTEED */ +void spp_trace_logger(u16 error_code, u16 num_args, u32 *arg) +{ + /* To be filled */ +} + +void spp_trace_log_init(void) +{ + /* To be filled */ +} + +void init_trace_log_buf_pool(void) +{ + /* To be filled */ +} + +void spp_printf(u16 error_code, u16 num_args, u32 *arg) +{ + /* To be filled */ +} + +inline +u32 spp_trace_log_get_unix_time_in_seconds (void) +{ + vlib_main_t *vlib_main; + + vlib_main = vlib_get_main(); + return(vlib_time_now((vlib_main_t *) vlib_main)); +} + +#endif /* TOBE_PORTED */ + diff --git a/vnet/vnet/vcgn/spp_platform_trace_log.h b/vnet/vnet/vcgn/spp_platform_trace_log.h new file mode 100644 index 00000000000..cffe5a09956 --- /dev/null +++ b/vnet/vnet/vcgn/spp_platform_trace_log.h @@ -0,0 +1,357 @@ +/* + *------------------------------------------------------------------ + * spp_platform_trace_log.h + * + * Copyright (c) 2009-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __SPP_PLATFORM_TRACE_LOG_H__ +#define __SPP_PLATFORM_TRACE_LOG_H__ + +#include <stdio.h> +#include <vppinfra/vec.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> +#include <vppinfra/clib.h> + +#include "spp_ctx.h" +#include "spp_timers.h" + + +typedef enum { + SPP_LOG_LTRACE, + SPP_LOG_MAX +} spp_log_type_t; + +typedef struct { + u32 num_traces; +} spp_trace_log_hdr_t; + +typedef struct { + u16 error_code; + u16 num_args; + u32 arg[0]; +} spp_trace_log_t; + +#define DUMP_PKT_IDX 61 +#define OCTEON_SENSOR_READ 62 + +typedef enum { + CNAT_ERROR_SUCCESS, + CNAT_NO_CONFIG_ERROR, + CNAT_NO_VRF_RUN_ERROR, + CNAT_NO_POOL_FOR_ANY_ERROR, + CNAT_NO_PORT_FOR_ANY_ERROR, + CNAT_BAD_INUSE_ANY_ERROR, + CNAT_NOT_FOUND_ANY_ERROR, + CNAT_INV_PORT_FOR_DIRECT_ERROR, + CNAT_BAD_INUSE_DIRECT_ERROR, + CNAT_NOT_FOUND_DIRECT_ERROR, + CNAT_OUT_OF_PORT_LIMIT_ERROR, + CNAT_MAIN_DB_CREATE_ERROR, + CNAT_LOOKUP_ERROR, + CNAT_INDEX_MISMATCH_ERROR, + CNAT_PACKET_DROP_ERROR, + CNAT_INV_UNUSED_USR_INDEX, + CNAT_INVALID_VRFMAP_INDEX, + CNAT_USER_OUT_OF_PORTS, + CNAT_EXT_PORT_THRESH_EXCEEDED, + CNAT_EXT_PORT_THRESH_NORMAL, + CNAT_NO_EXT_PORT_AVAILABLE, + CNAT_SESSION_THRESH_EXCEEDED, + CNAT_SESSION_THRESH_NORMAL, + WQE_ALLOCATION_ERROR, + ERROR_PKT_DROPPED, + SYSMGR_PD_KEY_CREATION_ERROR, + SYSMGR_PD_SHMEM_ID_ERROR, + SYSMGR_PD_SHMEM_ATTACH_ERROR, + OCTEON_CKHUM_SKIPPED, + PK0_SEND_STATUS, + CMD_BUF_ALLOC_ERR, + SPP_CTX_ALLOC_FAILED, + SPP_MAX_DISPATCH_REACHED, + HA_SIGCHILD_RECV, + SIGACTION_ERR, + HA_INVALID_SEQ_OR_CONFIG_OR_TYPE, + NODE_CREATION_ERROR, + CNAT_CLI_INVALID_INPUT, /* new adds as part of CSCto04510, see sub codes below */ + CNAT_DUMMY_HANDLER_HIT, /* Has sub codes , see spp_dummy_handler_sub_cdes_t */ + CNAT_CONFIG_ERROR, /* has subcodes-see spp_config_error_sub_codes_t below */ + CNAT_NFV9_ERROR, /* Has sub codes see spp_nfv9_error_sub_codes_t below */ + CNAT_CMVX_TWSI_READ_WRITE_FAIL, /* Hassub codes see spp_cmvx_error_sub_codes_t */ + CNAT_TEMP_SENSOR_TIMEOUT, + CNAT_TEMP_SENSOR_DATA_MISMATCH, + CNAT_TEMP_SENSOR_CONFIG_FAILED, + HA_APP_NOT_RESPONDING, + HA_DATA_PATH_TEST_FAILED, + CNAT_WRONG_PORT_ALLOC_TYPE, + CNAT_NEW_PORT_ALLOC_ERROR, + CNAT_INVALID_INDEX_TO_FREE_PORT, + CNAT_DELETE_DB_ENTRY_NO_PORTMAP, + CNAT_MAIN_DB_LIMIT_ERROR, + CNAT_USER_DB_LIMIT_ERROR, + CNAT_FRAG_DB_ERROR, /* see spp_frag_db_error_sub_codes_t below */ + + DROP_PKT_DUMP, + CNAT_NAT64_SYSTEM_LIMIT_ERROR, + CNAT_ERROR_MAX +} spp_error_codes_t; + +typedef enum { + + TCP_MSS_INVALID_IVRF = 10, /* 1 param - vrf id */ + NFV9_LOG_INVALID_IP_OR_PORT = 20, /* 2 params - nfv9 server ip and port */ + NFV9_LOG_INVALID_PARAMS_OTHERS, /* 3 params, ref rate, time out, path mtu */ + NFV9_LOG_PATH_MTU_TOO_SMALL, /* 1 param, path mtu passed */ + NFV9_LOG_CANNOT_ADD_VRF_NOT_FOUND, /* 1 param, in vrf id */ + + VRF_MAP_ADDR_POOL_START_ADDR_GT_END_ADDR = 30, /* 2 params, start and end addr */ + VRF_MAP_ADDR_POOL_ADDR_POOL_TOO_LARGE, /* 2 params, start and end addr */ + VRF_MAP_ADDR_POOL_INVALID_IN_OR_OUT_VRF, /* 2 params, in vrf and out vrf */ + VRF_MAP_ADDR_POOL_TOO_LARGE_FOR_CORE, /* 2 params, pool size, core instance */ + VRF_MAP_DEL_POOL_START_ADDR_GT_END_ADDR, /* 2 params, start and end addr */ + VRF_MAP_DEL_POOL_ADDR_POOL_NOT_FOUND, /* 2 params, start and end addr */ + VRF_MAP_DEL_POOL_VRF_MAP_EMPTY, /* 2 params, start and end addr */ + + ADD_SVI_ADDR_INVALID_VRF = 40, /* 2 params, vrf passed and ipv4 addr */ + ADD_SVI_INDEX_INVALID_VRF, /* 2 params, vrf, uidb_index */ + + MAPPED_STAT_PORT_INVALID_OUTPUT_PARAMS = 50, + /* 3 params, out vrf, out ip, out port */ + MAPPED_STAT_PORT_UDP_PORT_POLARITY_MISMATCH, /* 2 params, in port and out port */ + MAPPED_STAT_PORT_IN_VRF_MAP_EMPTY, /* 1 param, in vrf id passed */ + MAPPED_STAT_PORT_VRF_MAP_NOT_IN_S_RUN, /* 1 param, vrf map status */ + MAPPED_STAT_PORT_INVALID_OUT_VRF_ID, /* 1 param, out vrf id passed */ + MAPPED_STAT_PORT_FAILED_TO_ADD_STAT_PORT, /* 4 params, in vrf, in ip, in port, error code */ + + STAT_PORT_INVALID_IN_PARAMS = 60, /* 4 params, in vrf, in ip, in port, proto */ + STAT_PORT_FAILED_TO_ADD_STAT_PORT, /* 4 params, in vrf, in ip, in port, error code */ + STAT_PORT_CONFIG_IN_USE, /* 4 params, in vrf, in ip, in port, proto */ + + DEL_STAT_PORT_IN_VRF_MAP_EMPTY = 70, /* 1 param, in vrf id passed */ + DEL_STAT_PORT_INVALID_IN_PARAMS, /* 4 params, in vrf, in ip, in port, proto */ + DEL_STAT_PORT_CANNOT_DELETE_NO_ENTRY, /* 4 params, in vrf, in ip, in port, proto */ + DEL_STAT_PORT_CANNOT_DELETE_NOT_STATIC_PORT, /* 4 params, in vrf, in ip, in port, proto*/ + + XLAT_SVI_CFG_INVALID_INDEX = 80, /* 1 param - uidb_index */ + XLAT_WRONG_V6_PREFIX_MASK, /* 1 param - v6 prefix mask */ + XLAT_INVALID_XLAT_ID_ERROR, /* 1 param - id */ + + V6RD_INVALID_6RD_ID_ERROR = 90, /*1 param - id */ + MAPE_INVALID_MAPE_ID_ERROR = 100 /* param - id */ +} spp_config_error_sub_codes_t; + +typedef enum { + CONFIG_DUMMY, + CONFIG_DUMMY_MAX, + SHOW_DUMMY, + SHOW_DUMMY_MAX, + DEBUG_DUMMY, + DEBUG_DUMMY_MAX +} spp_dummy_handler_sub_cdes_t; + +typedef enum { + CMVX_READ, + CMVX_WRITE +} spp_cmvx_error_sub_codes_t; + +typedef enum { + FRAG_DB_INVALID_BUCKET, + FRAG_DB_NO_ENTRY +} spp_frag_db_error_sub_codes_t; + +typedef enum { + CLI_INVALID_PAYLOAD_SIZE, + CLI_INVALID_MSG_ID +} spp_cli_error_sub_codes_t; + +typedef enum { + NFV9_DOWNSTREAM_CONGESTION, + NFV9_FAILED_TO_CREATE_CONTEXT +} spp_nfv9_error_sub_codes_t; + +typedef struct spp_cnat_logger_tbl_t_ { + u16 error_code; // The thread id stored by software + u16 num_args; + u16 rate_limit_time; // If we need to rate_limit logging + u8 param_name[7][32];// Parameter name for debug purposes +} spp_cnat_logger_tbl_t; + +extern spp_cnat_logger_tbl_t spp_cnat_logger_table[]; + +/* + * This corresponds to the length of the IMETRO SHIM Header for RODDICK + * For non-roddick cases, introduce an Ethernet header as well + */ +#if defined(RODDICK) +#define SPP_TRACE_LOG_SHIM_HDR_OFFSET 8 +#define SPP_TRACE_LOG_ENCAPS_OFFSET 0 +#else +#define SPP_TRACE_LOG_SHIM_HDR_OFFSET 0 +#define SPP_TRACE_LOG_ENCAPS_OFFSET 16 +#endif + +#define SPP_LOG_TRACE_HEADER_LENGTH \ + (sizeof(spp_trace_log_hdr_t)) + + +#define SPP_TRACE_LOG_IP_HDR_OFFSET \ + (SPP_TRACE_LOG_ENCAPS_OFFSET + \ + SPP_TRACE_LOG_SHIM_HDR_OFFSET) + + +#define SPP_TRACE_LOG_UDP_HDR_OFFSET \ + (SPP_TRACE_LOG_IP_HDR_OFFSET + sizeof(ipv4_header)) + +#define SPP_TRACE_LOG_HDR_OFFSET \ + (SPP_TRACE_LOG_UDP_HDR_OFFSET + sizeof(udp_hdr_type_t)) + +#define SPP_TRACE_LOG_RECORD_LENGTH 4 + +/* + * Let us put the maximum length of the log data to be 1400 + */ +#define SPP_TRACE_LOG_MAX_PKT_LENGTH 800 + +/* Structures and defines to store log info for MSC */ +#define SPP_TRACE_LOG_INVALID_LOGGING_INDEX 0xffffffff + +/* + * This structure stores the Logging information on per LOG TYPE + * basis. This structure is allocated from a pool and index + * to this structure based on log type + */ +typedef struct { + /* + * This field determines the maximum size of the Netflow V9 information + * that can be stored in a logging packet + */ + u16 max_length_minus_max_record_size; + + u32 sequence_num; /* Sequence number of the logging packet */ + u32 last_pkt_sent_count; + u16 pkt_length; /* Length of the currently NFv9 information */ + u16 log_record_length; /* Length of add record */ + u16 total_record_length; /* number of trace records */ + u16 total_record_count; + spp_log_type_t log_type; + /* + * current logging context + */ + spp_ctx_t *current_logging_context; + + /* + * Timestamp in UNIX seconds corresponding to when the current + * logging packet was created + */ + u32 current_logging_context_timestamp; + + /* + * Queued logging context waiting to be sent to the l3 infra node + */ + spp_ctx_t *queued_logging_context; + + /* + * Headers corresponding to various records in this + * current nfv9 logging context + */ + spp_trace_log_t *log_record; + spp_trace_log_hdr_t *log_header; + u8 *next_data_ptr; + +} spp_trace_log_info_t; + +typedef struct { + /* + * spp_ctx_alloc() call failed + */ + u64 spp_trace_log_context_creation_fail_count; + + /* + * Cannot send the existing logging pkt, so cannot create + * any additional packets for logging purposes + */ + u64 spp_trace_log_context_creation_deferred_count; + + /* + * Cannot send the existing logging pkt due to cnat_rewrite_output + * superframe being full. + */ + u64 spp_trace_log_downstream_constipation_count; +} spp_global_counters_t; + + +/* + * Global structure for SPP LOGS + */ +typedef struct { + /* A timer structure to periodically send log packets + * that have been waiting to be full for a long time. This will + * ensure event/error logs don't get delayed too much before they + * are sent to the MSC. + */ + spp_timer_t log_timer; + + /* + * Node index corresponding to the infra L3 output node + * to which the nfv9 logging node will send the packet + */ + u16 spp_trace_log_disp_node_index; + + /* + * Whether we have initialized the NFv9 information + */ + u8 spp_trace_log_init_done; + + /* + * pool index in global pool based on log type + */ + u32 spp_log_pool_index[SPP_LOG_MAX]; + +} spp_trace_log_global_info_t; + + +extern spp_timer_t sensor_timer; +extern spp_trace_log_info_t spp_default_trace_log_info; +extern spp_trace_log_info_t *spp_trace_log_info_pool; + +extern spp_trace_log_global_info_t spp_trace_log_global_info; + +void spp_trace_logger(u16 error_code, u16 num_args, u32 *arg); +void spp_trace_log_init(void); +void init_trace_log_buf_pool(void); +void spp_printf(u16 error_code, u16 num_args, u32 *arg); + +/* + * The following 2 functions are temporary hacks until + * we have RTC support from the PD nodes + */ +inline +u32 spp_trace_log_get_sys_up_time_in_ms (void); + +inline +u32 spp_trace_log_get_unix_time_in_seconds (void); + +enum { + TEMPERATURE_SENSOR_TEST_MODE, + TEMPERATURE_SENSOR_QUIET_MODE, +}; + +extern int temperature_read_blocked; + +void read_octeon_sensors(u8 mode); +void Init_temperature_sensors(); +#endif /* __SPP_PLATFORM_TRACE_LOG_H__ */ diff --git a/vnet/vnet/vcgn/spp_timers.h b/vnet/vnet/vcgn/spp_timers.h new file mode 100644 index 00000000000..afb0147b2ed --- /dev/null +++ b/vnet/vnet/vcgn/spp_timers.h @@ -0,0 +1,139 @@ +/* + *------------------------------------------------------------------ + * spp_timers.h + * + * Copyright (c) 2008-2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#ifndef __SPP_TIMERS_H__ +#define __SPP_TIMERS_H__ + + +typedef struct d_list_el_ { + struct d_list_el_ *next; + struct d_list_el_ *prev; +} d_list_el_t; + +/* + * d_list_init + */ + +static inline void d_list_init (d_list_el_t *headp) +{ + headp->prev = headp->next = headp; +} + +/* + * d_list_init - add at head of list + */ + +static inline void d_list_add_head (d_list_el_t *headp, + d_list_el_t *elp) +{ + ASSERT(elp->prev == elp); /* multiple enqueue, BAD! */ + ASSERT(elp->next == elp); + + elp->next = headp->next; + headp->next = elp; + elp->prev = elp->next->prev; + elp->next->prev = elp; +} + +/* + * d_list_add_tail - add element at tail of list + */ +static inline void d_list_add_tail (d_list_el_t *headp, + d_list_el_t *elp) +{ + ASSERT(elp->prev == elp); /* multiple enqueue, BAD! */ + ASSERT(elp->next == elp); + + headp = headp->prev; + + elp->next = headp->next; + headp->next = elp; + elp->prev = elp->next->prev; + elp->next->prev = elp; +} + +/* + * d_list_rem_head - removes first element from list + */ +static inline d_list_el_t *d_list_rem_head (d_list_el_t *headp) +{ + d_list_el_t *elp; + + elp = headp->next; + if (elp == headp) + return (NULL); + headp->next = elp->next; + elp->next->prev = elp->prev; + + elp->next = elp->prev = elp; + return (elp); +} + +/* + * d_list_rem_elem - removes specific element from list. + */ +static inline void d_list_rem_elem (d_list_el_t *elp) +{ + d_list_el_t *headp; + + headp = elp->prev; + + headp->next = elp->next; + elp->next->prev = elp->prev; + elp->next = elp->prev = elp; +} + +#define TIMER_BKTS_PER_WHEEL 128 /* power of 2, please */ +#define TIMER_NWHEELS 4 + +typedef struct { + i32 curindex; /* current index for this wheel */ + d_list_el_t *bkts; /* vector of bucket listheads */ +} spp_timer_wheel_t; + + +typedef struct { + u64 next_run_ticks; /* Next time we expire timers */ + spp_timer_wheel_t **wheels; /* pointers to wheels */ +} spp_timer_axle_t; + + +typedef struct { + d_list_el_t el; + u16 cb_index; + u16 flags; + u64 expires; +} spp_timer_t; + +#define SPP_TIMER_RUNNING 0x0001 + + +/* + * prototypes + */ +void spp_timer_set_ticks_per_ms(u64); +void spp_timer_axle_init (spp_timer_axle_t *ta); +void spp_timer_expire(spp_timer_axle_t *ta, u64 now); +void spp_timer_final_init(void); + +void spp_timer_start(spp_timer_t *tp); +void spp_timer_start_axle(spp_timer_axle_t *ta, spp_timer_t *tp); +void spp_timer_stop(spp_timer_t *tp); +u16 spp_timer_register_callback (void (*fp)(spp_timer_t *)); + +#endif /* __SPP_TIMERS_H__ */ diff --git a/vnet/vnet/vcgn/tcp_header_definitions.h b/vnet/vnet/vcgn/tcp_header_definitions.h new file mode 100644 index 00000000000..02920bcc8ee --- /dev/null +++ b/vnet/vnet/vcgn/tcp_header_definitions.h @@ -0,0 +1,1582 @@ +/* + *----------------------------------------------------------------------------- + * + * Filename: tcp_header_definitions.h + * + * Description: Layer 2, 3, 4 definitions and header types + * + * Assumptions and Constraints: + * + * Copyright (c) 2012-2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *----------------------------------------------------------------------------- + */ + +#ifndef __TCP_HEADER_DEFINITIONS_H__ +#define __TCP_HEADER_DEFINITIONS_H__ + +/* + * A general list of Layer 3 protocols, used by many Layer 2 encaps. + * + * formerly: + * TYPE_IP TYPE_IP10MB + * TYPE_ARP TYPE_RFC826_ARP + * TYPE_RARP TYPE_REVERSE_ARP + * TYPE_MPLS TYPE_TAGSWITCH + */ +#define TYPE_IP 0x0800 +#define TYPE_IP_V6 0x86DD +#define TYPE_ARP 0x0806 +#define TYPE_RARP 0x8035 +#define TYPE_MPLS 0x8847 +#define TYPE_CDP 0x2000 +#define TYPE_CGMP 0x2001 +#define TYPE_LACP 0x8808 /* 802.3ad */ +#define TYPE_CLNS 0xFEFE + +#define TYPE_PPPOE_SESSION 0x8864 /* PTA plus */ +#define TYPE_PPPOE_DISCOVERY 0x8863 /* PTA plus */ + +/* + * for atm arp handling + */ +#define IN_ATM_ARP_BIT 0x0008 + +/* + * The Layer 2 header structures. + */ + + +/* +** HDLC +*/ + +typedef struct hdlc_hdr_type { + u16 addr; + u16 type; + u8 data[0]; +} hdlc_hdr_type; + +#define HDLC_ADDR_CMD 0x0F00 +#define HDLC_HDR_LEN 4 +#define HDLC_BROADCAST_BIT 31 +#define TYPE_KEEP 0x8035 + +#define HDLC_CLNS (HDLC_ADDR_CMD<<16|TYPE_CLNS) +#define HDLC_CDP (HDLC_ADDR_CMD<<16|TYPE_CDP) +#define HDLC_MPLS (HDLC_ADDR_CMD<<16|TYPE_MPLS) +#define HDLC_IP (HDLC_ADDR_CMD<<16|TYPE_IP) +#define HDLC_IP_V6 (HDLC_ADDR_CMD<<16|TYPE_IP_V6) +#define HDLC_KEEPALIVE_CMD (HDLC_ADDR_CMD<<16|TYPE_KEEP) + +/* +** PPP +*/ + +typedef struct ppp_comp_hdr_type { + union { + u8 ppp_u8[4]; + u16 ppp_u16[2]; + u32 ppp_u32; + } ppp_comp_u; +} ppp_comp_hdr_type; + +#define PPP_STATION 0xFF03 +#define PPP_STATION_LEN 0x2 +#define PPP_ENDPROTO 0x01 +#define PPP_NOT_ENDPROTO 0xfffffffe +#define PPP_CONTROL_PROTOCOL_MASK 0x8000 +#define PPP_CONTROL_PROTOCOL_BIT 15 +#define PPP_CSCO_LEN 4 +#define PPP_RFC1661_LEN 2 +#define PPP_RFC1661_COMP_LEN 1 + +#define TYPE_PPP_IP 0x0021 +#define TYPE_PPP_IP_V6 0x0057 +#define TYPE_PPP_MPLS_UNICAST 0x0281 +#define TYPE_PPP_MPLS_CONTROL 0x8281 +#define TYPE_PPP_CLNS 0x0023 +#define TYPE_PPP_CDP 0x0207 + +#define TYPE_PPP_IPCP 0x8021 +#define TYPE_PPP_LCP 0xC021 +#define TYPE_PPP_PAP 0xC023 +#define TYPE_PPP_LQR 0xC025 +#define TYPE_PPP_CHAP 0xC223 + + +#define TYPE_PPP_LCP_ECHO_REQUEST 0x09 +/* +** MultiLink PPP +*/ + +#define MLPPP_FLAGS_FIELD_LEN 4 +#define MLPPP_BEGIN_MASK 0x80000000 +#define MLPPP_END_MASK 0x40000000 +#define MLPPP_BEGIN_END_MASK (MLPPP_BEGIN_MASK|MLPPP_END_MASK) +#define MLPPP_BEGIN_END_SHIFT 30 +#define MLPPP_SEQUENCE_NUM_MASK 0x00FFFFFF +#define MLPPP_MC_CLASS_ID_MASK 0x3C000000 +#define MLPPP_MC_CLASS_SHIFT 26 + +#define TYPE_PPP_MULTILINK 0x003D + +/* these are needed in the micro-code, for optimizations */ +#define TYPE_PPP_FULL_IP_4 0xff030021 +#define TYPE_PPP_FULL_IP_3 0xff0321 +#define TYPE_PPP_FULL_IP_2 0x0021 +#define TYPE_PPP_FULL_IP_1 0x21 + +#define MLPPP_BEGIN_END_MASK_BYTE 0xC0 +#define MLPPP_BEGIN_BIT 7 +#define MLPPP_END_BIT 6 +#define MLPPP_MC_CLASS_ID_MASK_BYTE 0x3C +#define MLPPP_MC_CLASS_ID_SHIFT_BYTE 2 + +#define MLPOA_BEGIN_END_SHIFT 24 + +/* +** Ethernet ARPA +*/ + + +typedef struct ethernet_arpa_hdr_type { + u8 daddr[6]; + u8 saddr[6]; + u16 type; + u8 data[0]; +} ethernet_arpa_hdr_type; + +typedef struct extension_802p3_type { + u16 type; + u8 ctl; + u8 data[0]; +} extension_802p3_type; + +typedef struct ethernet_802p3_hdr_type { + u8 daddr[6]; + u8 saddr[6]; + u16 length; + extension_802p3_type extension; +} ethernet_802p3_hdr_type; + + +typedef struct ethernet_vlan_802p3_hdr_type { + u8 daddr[6]; + u8 saddr[6]; + u16 type1; + u16 vlan_id; + u16 length; + extension_802p3_type extension; +} ethernet_vlan_802p3_hdr_type; + +#define MIN_ETHERNET_PKT_LEN 60 +#define MAX_ETHERNET_PKT_LEN 1500 +#define ETHERNET_ARPA_HDR_LEN 14 +#define ETHERNET_TYPE_FIELD_SIZE 2 + + +/* +** Ethernet 802.1q (VLAN) +*/ + +typedef struct ethernet_vlan_hdr_type { + u8 dest_addr[6]; + u8 src_addr[6]; + u16 type1; + u16 vlan_hdr; + u16 type2; + u8 data[0]; +} ethernet_vlan_hdr_type; + + +/* +** Ethernet 802.1.q-in-q (QinQ) +*/ + +typedef struct ethernet_qinq_hdr_type { + u8 dest_addr[6]; + u8 src_addr[6]; + u16 type1; + u16 vlan_hdr1; + u16 type2; + u16 vlan_hdr2; + u16 type3; + u8 data[0]; +} ethernet_qinq_hdr_type; + + +/* +** Ethernet 802.3ad EtherChannel control +*/ + +typedef struct ethernet_lacp_hdr_type { + u8 daddr[6]; + u8 saddr[6]; + u16 type; + u16 LAcmd; + u8 data[0]; +} ethernet_lacp_hdr_type; + + +/* +** Ethernet 802.1 Bridge (spanning tree) PDU +*/ + +typedef struct ethernet_bpdu_hdr_type { + u8 daddr[6]; + u8 saddr[6]; + u8 dsap; + u8 ssap; + u8 control; + u8 more[0]; +} ethernet_bpdu_hdr_type; + +#define ETH_BPDU_DSAP 0x42 +#define ETH_BPDU_SSAP 0x42 +#define ETH_BPDU_CONTROL 0x03 +#define ETH_BPDU_MATCH 0x424203 + + +/************************************************************/ +/* PTA PLUS ETHERNET ENCAPSULATIONS */ +/* + * PPPoEoARPA 20 bytes + */ +typedef struct ethernet_pppoe_arpa_hdr_type { + u8 daddr[6]; + u8 saddr[6]; + u16 type; + /* pppoe hdr at begining of enet payload */ + u16 vtc; /* version(4b), type(4b) and code(8b) fields */ + u16 sid; + u16 len; + u8 ppp_header[0]; /* PPP header start, no ff03 field present */ +} ethernet_pppoe_arpa_hdr_type; + +typedef struct pppoe_hdr_type { + /* pppoe hdr at begining of enet payload */ + u16 vtc; /* version(4b), type(4b) and code(8b) fields */ + u16 sid; + u16 len; + u8 ppp_header[0]; /* PPP header start, no ff03 field present */ +} pppoe_hdr_type; + +/* +** PPPoEoVLAN (802.1p or 802.1q) 24 bytes +*/ +typedef struct ethernet_pppoe_vlan_hdr_type { + u8 dest_addr[6]; + u8 src_addr[6]; + u16 type1; + u16 vlan_hdr; + u16 type2; + /* pppoe hdr at begining of enet payload */ + u16 vtc; /* version(4b), type(4b) and code(8b) fields */ + u16 sid; + u16 len; + u8 ppp_header[0]; /* PPP header start, no ff03 field present */ +} ethernet_pppoe_vlan_hdr_type; + +/* +** PPPoEoQinQ 28 bytes +*/ +typedef struct ethernet_pppoe_qinq_hdr_type { + u8 dest_addr[6]; + u8 src_addr[6]; + u16 type1; + u16 vlan_hdr1; + u16 type2; + u16 vlan_hdr2; + u16 type3; + /* pppoe hdr at begining of enet payload */ + u16 vtc; /* version(4b), type(4b) and code(8b) fields */ + u16 sid; + u16 len; + u8 ppp_header[0]; /* PPP header start, no ff03 field present */ +} ethernet_pppoe_qinq_hdr_type; + +#define ETH_PPPOE_ARPA_HDR_LEN sizeof(ethernet_pppoe_arpa_hdr_type) +#define ETH_PPPOE_VLAN_HDR_LEN sizeof(ethernet_pppoe_vlan_hdr_type) +#define ETH_PPPOE_QINQ_HDR_LEN sizeof(ethernet_pppoe_qinq_hdr_type) +#define PPPOE_HDR_LEN 6 +/* End PTA PLUS ETHERNET ENCAPSULATIONS */ +/****************************************************************/ + + + +#define TYPE_DOT1Q 0x8100 +#define DOT1Q_HDR_LEN 18 +#define DOT1Q_VLAN_ID_MASK 0x0FFF +#define DOT1Q_VLAN_ID_RES_0 0x0000 +#define DOT1Q_VLAN_ID_RES_4095 0x0FFF +#define DOT1Q_ARPA_INDEX DOT1Q_VLAN_ID_RES_0 + +#define TYPE_QINQ_91 0x9100 +#define TYPE_QINQ_92 0x9200 +#define TYPE_QINQ_88A8 0x88A8 +#define QINQ_HDR_LEN 22 + +/* + * 802.1p support + */ +#define DOT1P_VLAN_COS_MASK 0xE000 +#define DOT1P_VLAN_COS_SHIFT 13 +#define DOT1P_MAX_COS_VALUE 7 + +/* +** Frame Relay +*/ + +/* + * formerly: + * TYPE_FR_IETF_IPV4 ENCAPS_FR_IETF + * TYPE_FR_CISCO_IPV4 ENCAPS_FR_CISCO + * TYPE_FR_ISIS ENCAPS_FR_ISIS + * + * FR_LMI_DLCI_CISCO LMI_DLCI_CISCO + * FR_LMI_DLCI_IETF LMI_DLCI_ITUANSI + */ + +typedef struct frame_relay_hdr_type { + u16 address; + u16 control_nlpid; + u8 data[0]; +} frame_relay_hdr_type; + +typedef struct fr_snap_hdr_type { + u16 address; + u8 control; + u8 pad; + u8 nlpid; + u8 oui[3]; + u16 protocol_id; +} fr_snap_hdr_type; + +#define FR_ADDR_LEN 2 +#define FR_CTL_NLPID_LEN 2 +#define FR_HDR_LEN (FR_ADDR_LEN+FR_CTL_NLPID_LEN) + +/* + * These defines are for the FR-SNAP header. + * The SNAP header is set up solely so that we can + * identify ARP packets, which look like this: + * + * control pad nlpid oui protocol_id + * 03 00 80 00 00 00 0806 + */ +#define FR_ARP_CONTROL 0x03 +#define FR_ARP_PAD 0x00 +#define FR_ARP_NLPID 0x80 +#define FR_ARP_OUI_0 0x00 +#define FR_ARP_OUI_1 0x00 +#define FR_ARP_OUI_2 0x00 +/* + * these are used only in the tmc code + */ +#define FR_NLPID_OUI_LEN 4 +#define FR_ARP_CONTROL_PAD 0x0300 +#define FR_ARP_NLPID_OUI 0x80000000 + + +#define FR_DLCI_UPPER_MASK 0xFC00 +#define FR_DLCI_UPPER_SHIFT 6 +#define FR_DLCI_LOWER_MASK 0x00F0 +#define FR_DLCI_LOWER_SHIFT 4 + +/* + * Defines for converting a DLCI for insertion into a synthesized FR address + * field for FRoMPLS disposition. + + * bit 8 7 6 5 4 3 2 1 + * +-------------------------------+ + * | Flag | + * | 0 1 1 1 1 1 1 0 | + * +-------------------------------+ + * | Upper DLCI |C/R| 0 | + * +-------------------------------+ + * | Lower DLCI | F | B | DE| 1 | + * +-------------------------------+ + * | | + * :Frame relay information field : + * : (i.e.payload) : + * | | + * +-------------------------------+ + * | FCS (2 or 4 octets) | + * | | + * +-------------------------------+ + * | Flag | + * | 0 1 1 1 1 1 1 0 | + * +-------------------------------+ + * + * a-With 10 bits for the DLCI + */ +#define FR_DLCI_TO_HDR_UPPER_MASK 0x3f0 +#define FR_DLCI_TO_HDR_UPPER_SHIFT (10-4) +#define FR_DLCI_TO_HDR_LOWER_MASK 0xf +#define FR_DLCI_TO_HDR_LOWER_SHIFT 4 + +#define TYPE_FR_IETF_IP 0x03CC +#define TYPE_FR_IETF_IP_V6 0x038E +#define TYPE_FR_CISCO_IP 0x0800 +#define TYPE_FR_CISCO_IP_V6 0x86DD +#define TYPE_FR_ISIS 0x0383 +#define TYPE_FR_SNAP0PAD 0x0380 +#define TYPE_FR_SNAP1PAD 0x0300 +#define TYPE_FR_FRF12 0x03B1 +#define TYPE_FR_MLP 0x03CF +#define TYPE_FR_EEK 0x8037 + +#define FR_LMI_DLCI_CISCO 1023 +#define FR_LMI_DLCI_IETF 0 + +#define FR_NOT_NOT_NOT 0 +#define FR_NOT_NOT_DE 1 +#define FR_NOT_BECN_NOT 2 +#define FR_NOT_BECN_DE 3 +#define FR_FECN_NOT_NOT 4 +#define FR_FECN_NOT_DE 5 +#define FR_FECN_BECN_NOT 6 +#define FR_FECN_BECN_DE 7 + +#define FR_FECN_BECN_DE_MASK 0x000E +#define FR_FECN_BECN_DE_SHIFT 1 + +/* Address field extension bit for standard 2-byte FR address field */ +#define FR_EA1_MASK 0x0001 +#define FR_EA1_MASK_BIT 0 + +/* + * these are needed in the micro-code, for optimizations + */ + +/* the bit position (in the address field) of the LSB of the DLCI */ +#define FR_DLCI_LS_BIT 4 + + +/* +** +** MultiLink Frame Relay +** +*/ + +typedef struct mlfr_hdr_type { + u16 frag_hdr; + u16 address; + u16 control_nlpid; + u8 data[0]; +} mlfr_hdr_type; + +/* + * LIP frames have B, E and C set--the other + * bits in the frag_hdr field are irrelevant. + * + * NOTE: Injected LIP packets have a frag_hdr of 0xE100. + * + */ +#define MLFR_LIP_FRAME 0xE100 +#define MLFR_LIP_MASK 0xE000 +#define MLFR_FRAG_HDR_LEN 2 + +#define MLFR_BEGIN_MASK 0x8000 +#define MLFR_END_MASK 0x4000 +#define MLFR_BEGIN_END_MASK (MLFR_BEGIN_MASK|MLFR_END_MASK) +#define MLFR_BEGIN_END_SHIFT 14 + +#define MLFR_SEQ_NUM_HI_MASK 0x1E00 +#define MLFR_SEQ_NUM_HI_SHIFT 1 +#define MLFR_SEQ_NUM_LO_MASK 0x00FF + +/* + * these are needed in the micro-code, for optimizations + */ +#define MLFR_BEGIN_END_MASK_BYTE 0xC0 + + +/* + * FRF.12 definitions + */ +typedef struct frf12_hdr_type_ { + u16 address; + u16 control_nlpid; + u16 frag_hdr; + u8 data[0]; +} frf12_hdr_type; + +#define FRF12_FRAG_HDR_LEN sizeof(frf12_hdr_type) + +#define FRF12_BEGIN_MASK 0x8000 +#define FRF12_END_MASK 0x4000 +#define FRF12_BEGIN_END_MASK (FRF12_BEGIN_MASK|FRF12_END_MASK) +#define FRF12_BEGIN_END_SHIFT 8 + +#define FRF12_SEQ_NUM_HI_MASK 0x1E00 +#define FRF12_SEQ_NUM_HI_SHIFT 1 +#define FRF12_SEQ_NUM_LO_MASK 0x00FF +#define FRF12_BEGIN_END_MASK_BYTE 0xC0 + + + +/* +** +** MLP over Frame Relay +** The ppp hdr can be either a +** an MLP hdr or a PPP hdr +** +** MLP can be compressed or not: +** a) 0xff03003d +** b) 0x003d +** c) 0x3d +** followed by: +** 1 byte with begin/end bits +** 3 bytes of a sequence # +** +** PPP can be also be compressed or not. +** Only these will be fwded: +** a) 0xff030021 +** b) 0xff0321 +** c) 0x0021 +** d) 0x21 +** +** +*/ +typedef struct mlpofr_hdr_type { + u16 address; + u16 control_nlpid; + u8 ppp_header[0]; +} mlpofr_hdr_type; + +/* +** ATM - +*/ + +/* + * channel_handle is defined as follows: + * + * bits 15 = reserved (must be 0) + * bits 14 - 0 = channel handle + * + * + * flags is a bitfield defined as follows: + * + * bits 15 - 13 = proto (PPPoA RFC1661 = 0, + * PPPoE = 1, + * RBE = 2, + * PPPoA Cisco = 3, + * MLPoATM RFC1661 = 4, + * MLPoATM Cisco = 5, + * Reserved = 6-7) + * bit 12 = encap (MUX=0, + * SNAP=1) + * bits 11 - 6 = reserved (must be 0) + * bits 5 - 3 = pkt_type (AAL5 pkt = 0, + * Raw cell (includes F4 OAM) = 1, + * F5 segment OAM cell = 2 + * F5 end-to-end OAM cell = 3 + * Reserved = 4-7) + * bit 2 = EFCI (congestion indication) + * bit 1 = reserved (must be 0) + * bit 0 = CLP (cell loss priority) + */ + +typedef struct apollo_atm_generic_hdr_type { + u16 channel_handle; + u16 flags; +} apollo_atm_generic_hdr_type; + +typedef struct apollo_atm_aal5_snap_hdr_type { + u16 channel_handle; + u16 flags; + u8 dsap; + u8 ssap; + u8 control; + u8 oui[3]; + u16 type; + u8 data[0]; +} apollo_atm_aal5_snap_hdr_type; + +typedef struct atm_aal5_snap_hdr_type { + u8 dsap; + u8 ssap; + u8 control; + u8 oui[3]; + u16 pid; + u16 pad; + u8 data[0]; +} atm_aal5_snap_hdr_type; + + +typedef struct apollo_atm_aal5_snap_hdr1_type { + u16 channel_handle; + u16 flags; + u8 dsap; + u8 ssap; + u8 control; + u8 oui0; + u8 oui1; + u8 oui2; + u16 type; + u8 data[0]; +} apollo_atm_aal5_snap_hdr1_type; + +typedef struct apollo_atm_aal5_clns_hdr_type { + u16 channel_handle; + u16 flags; + u16 type; + u16 data; +} apollo_atm_aal5_clns_hdr_type; + +typedef struct apollo_atm_aal5_ilmi_hdr_type { + u16 channel_handle; + u16 flags; + u8 data[0]; +} apollo_atm_aal5_ilmi_hdr_type; + +typedef struct apollo_atm_aal5_mux_hdr_type { + u16 channel_handle; + u16 flags; + u8 data[0]; +} apollo_atm_aal5_mux_hdr_type; + +typedef struct apollo_atm_oam_f4_hdr_type { + u16 channel_handle; + u16 flags; + /* + * gcf_vpi_vci_pt_clp is a bitfield defined as follows: + * + * bits 31 - 28 = GCF + * bits 27 - 20 = VPI + * bits 19 - 4 = VCI + * bits 3 - 1 = PT + * bit 0 = CLP + */ + u32 gcf_vpi_vci_pt_clp; + u8 data[0]; +} apollo_atm_oam_f4_hdr_type; + +#define APOLLO_ATM_OAM_F4_HDR_PT_MASK 0xE +#define APOLLO_ATM_OAM_F4_HDR_PT_SHIFT 1 + +typedef struct apollo_atm_oam_f5_hdr_type { + u16 channel_handle; + u16 flags; + u8 data[0]; +} apollo_atm_oam_f5_hdr_type; + +#define APOLLO_IRONBUS_EXT_LESS_PROTO 0xFFFF0FFF +#define APOLLO_CHANNEL_HANDLE_MASK 0xFFFF +#define APOLLO_PKT_TYPE_MASK 0x0038 +#define APOLLO_PKT_TYPE_SHIFT 3 +#define APOLLO_FLAG_CLP_MASK 0x0001 +#define APOLLO_FLAG_CLP_BIT 0 + +#define APOLLO_CHANNEL_HANDLE_RES_0 0x0000 +/* + * The 1 byte HEC field is removed by the line card. + */ +#define APOLLO_F4_RX_CELL_SIZE 52 +#define APOLLO_F5_RX_CELL_SIZE 52 + +#define APOLLO_ATM_PACKET_TYPE_AAL5 0 +#define APOLLO_ATM_PACKET_TYPE_F4 1 +#define APOLLO_ATM_PACKET_TYPE_F5_SEG 2 +#define APOLLO_ATM_PACKET_TYPE_F5_E_TO_E 3 +#define APOLLO_ATM_PACKET_TYPE_4 4 +#define APOLLO_ATM_PACKET_TYPE_5 5 +#define APOLLO_ATM_PACKET_TYPE_6 6 +#define APOLLO_ATM_PACKET_RESERVED 7 + +#define APOLLO_AAL5_MUX_IP_HDR_LEN 4 +#define APOLLO_AAL5_SNAP_HDR_LEN 12 + +#define APOLLO_RCV_IRON_BUS_EXT_LEN 4 +#define APOLLO_TX_IRON_BUS_EXT_LEN 8 + +/* + * MLPoA type definitions + */ +#define MLPOA_CISCO_HDR 0xFF03 +#define MLPOA_SNAP_HDR_LEN 4 +#define MLPOA_CISCO_HDR_LEN 2 + +/************************************************************/ +/* PTA PLUS ATM ENCAPSULATIONS */ + +/* RBE header 28 bytes*/ +typedef struct apollo_atm_aal5_llcsnap_rbe_hdr_type { + u16 channel_handle; + u16 flags; + u8 dsap; + u8 ssap; + u8 control; + u8 oui[3]; + u16 pid; + u16 pad; + /* enet header within */ + u8 daddr[6]; + u8 saddr[6]; + u16 type; + u8 data[0]; /* start of IP */ +} apollo_atm_aal5_llcsnap_rbe_hdr_type; + +/* PPPoEoA header 34 bytes*/ +typedef struct apollo_atm_aal5_llcsnap_pppoe_hdr_type { + u16 channel_handle; + u16 flags; + u8 dsap; + u8 ssap; + u8 control; + u8 oui[3]; + u16 pid; + u16 pad; + /* enet header within */ + u8 daddr[6]; + u8 saddr[6]; + u16 type; + /* pppoe hdr at begining of enet payload */ + u16 vtc; /* version(4b), type(4b) and code(8b) fields */ + u16 sid; + u16 len; + u8 ppp_header[0]; /* PPP header start, no ff03 field present */ +} apollo_atm_aal5_llcsnap_pppoe_hdr_type; + + +/* PPPoA MUX 4 bytes*/ +typedef struct apollo_atm_aal5_mux_pppoa_hdr_type { + u16 channel_handle; + u16 flags; + u8 ppp_header[0]; +} apollo_atm_aal5_mux_pppoa_hdr_type; + + +/* PPPoA SNAP LLC 8 bytes */ +typedef struct apollo_atm_aal5_llcsnap_pppoa_hdr_type { + u16 channel_handle; + u16 flags; + u8 dsap; + u8 ssap; + u8 control; + u8 nlpid; + u8 ppp_header[0]; +} apollo_atm_aal5_llcsnap_pppoa_hdr_type; + +/* MLPoA MUX (generic) */ +typedef struct apollo_atm_aal5_mux_mlpoa_hdr_type { + u16 channel_handle; + u16 flags; + u8 ppp_header[0]; +} apollo_atm_aal5_mux_mlpoa_hdr_type; + +/* MLPoA SNAP LLC */ +typedef struct apollo_atm_aal5_llcsnap_mlpoa_hdr_type { + u16 channel_handle; + u16 flags; + u8 dsap; + u8 ssap; + u8 control; + u8 nlpid; + u8 ppp_header[0]; +} apollo_atm_aal5_llcsnap_mlpoa_hdr_type; + + +#define PPPOA_SNAPLLC_HDR_LEN sizeof(apollo_atm_aal5_llcsnap_pppoa_hdr_type) +#define PPPOA_MUX_HDR_LEN sizeof(apollo_atm_aal5_mux_pppoa_hdr_type) +#define PPPOE_SNAPLLC_HDR_LEN sizeof(apollo_atm_aal5_llcsnap_pppoe_hdr_type) +#define RBE_SNAPLLC_HDR_LEN sizeof(apollo_atm_aal5_llcsnap_rbe_hdr_type) + +/* End PTA PLUS ATM ENCAPSULATIONS */ +/****************************************************************/ + +#define LLCSNAP_PID_DOT3_NOFCS 0x0007 + +/* +** the SNAP header +*/ + +/* + * Note that some of these definitions are split + * up along certain word or half word boundaries + * to help expediate the TMC code. + */ +#define LLC_SNAP_HDR_DSAP 0xAA +#define LLC_SNAP_HDR_SSAP 0xAA +#define LLC_SNAP_HDR_CONTROL 0x03 +#define LLC_SNAP_HDR_OUI_0 0x00 +#define LLC_SNAP_HDR_OUI_1 0x00 +#define LLC_SNAP_HDR_OUI_2 0x00 +#define LLC_SNAP_HDR_OUI_2_CDP 0x0C + +#define LLC_SNAP_HDR_DSAP_SSAP 0xAAAA +#define LLC_SNAP_HDR_DSAP_SSAP_CTRL_OUI0 0xAAAA0300 +#define LLC_SNAP_HDR_CONTROL_OUI 0x03000000 +#define LLC_SNAP_HDR_OUI1_OUI2_CDP 0x000C2000 + + + +/* +** SRP +*/ + +/* + * The v2_gen_hdr is a 2-byte field that contains the following: + * + * [ ttl | ring_id | mode | priority | parity ] + * bits 8 1 3 3 1 + */ +typedef struct srp_hdr_type { + u16 v2_gen_hdr; + u8 dest_addr[6]; + u8 src_addr[6]; + u16 protocol; + u8 data[0]; +} srp_hdr_type; + +#define SRP_HDR_LEN 16 + +#define SRP_IB_CHANNEL_CONTROL 0x0000 +#define SRP_IB_CHANNEL_DATA_HI 0x0001 +#define SRP_IB_CHANNEL_DATA_LO 0x0002 + +#define SRP_RING_ID_MASK 0x0080 +#define SRP_RING_ID_BIT 7 + +#define SRP_MODE_BITS_MASK 0x0070 +#define SRP_MODE_BITS_SHIFT 4 +#define SRP_MODE_CONTROL_TOPOLOGY 4 +#define SRP_MODE_CONTROL_IPS 5 +#define SRP_MODE_DATA 7 + +#define SRP_PRIORITY_BITS_MASK 0x000E +#define SRP_PRIORITY_BITS_SHIFT 1 +#define SRP_PRIORITY_HIGH 7 +#define SRP_PRIORITY_PAK_PRIORITY 6 + +/* this is for the tmc code */ +#define SRP_INV_PRIORITY_BITS_MASK 0xFFF1 + +#define SRP_PROT_CONTROL_TOPOLOGY 0x2007 +#define SRP_PROT_CONTROL_IPS 0x2007 + +/* this is for the tmc code */ +#define SRP_TRUE 1 +#define SRP_FALSE 0 + +/* +** MPLS +*/ +#define MPLS_EOS_BIT 0x00000100 +#define MPLS_EOS_SHIFT 8 +#define MPLS_LABEL_SIZE 4 +#define MAX_MPLS_LABEL_STACK 6 +#define MPLS_LABEL_MASK 0xfffff000 +#define MPLS_LABEL_SHIFT 12 +#define MPLS_TTL_MASK 0x000000ff +#define MPLS_EXP_MASK 0x00000e00 +#define MPLS_EXP_SHIFT 9 +#define MPLS_EXP_TTL_MASK 0x00000eff + + + +typedef union _layer2 { + hdlc_hdr_type hdlc; + ppp_comp_hdr_type ppp; + ethernet_arpa_hdr_type eth_arpa; + ethernet_vlan_hdr_type eth_vlan; + ethernet_qinq_hdr_type eth_qinq; + ethernet_lacp_hdr_type eth_lacp; + ethernet_bpdu_hdr_type eth_bpdu; + ethernet_802p3_hdr_type eth_802p3; + ethernet_vlan_802p3_hdr_type eth_vlan_802p3; + ethernet_pppoe_arpa_hdr_type eth_pppoe_arpa; /* PTA plus */ + ethernet_pppoe_vlan_hdr_type eth_pppoe_vlan; /* PTA plus */ + ethernet_pppoe_qinq_hdr_type eth_pppoe_qinq; /* PTA plus */ + frame_relay_hdr_type frame_relay; + fr_snap_hdr_type fr_snap; + mlfr_hdr_type mlfr; + mlpofr_hdr_type mlpofr; + frf12_hdr_type frf12; + apollo_atm_generic_hdr_type atm_generic; + apollo_atm_aal5_snap_hdr_type atm_aal5_snap; + apollo_atm_aal5_snap_hdr1_type atm_aal5_snap1; + apollo_atm_aal5_clns_hdr_type atm_aal5_clns; + apollo_atm_aal5_ilmi_hdr_type atm_aal5_ilmi; + apollo_atm_aal5_mux_hdr_type atm_aal5_mux; + apollo_atm_oam_f4_hdr_type atm_oam_f4; + apollo_atm_oam_f5_hdr_type atm_oam_f5; + apollo_atm_aal5_llcsnap_rbe_hdr_type atm_aal5_rbe_snapllc; /* PTA plus */ + apollo_atm_aal5_llcsnap_pppoe_hdr_type atm_aal5_pppoe_snapllc; /* PTA plus */ + apollo_atm_aal5_mux_pppoa_hdr_type atm_aal5_pppoa_mux; /* PTA plus */ + apollo_atm_aal5_llcsnap_pppoa_hdr_type atm_aal5_pppoa_snapllc; /* PTA plus */ + apollo_atm_aal5_mux_mlpoa_hdr_type mlpoa_generic; + apollo_atm_aal5_llcsnap_mlpoa_hdr_type mlpoa_snapllc; + srp_hdr_type srp; +} layer2_t; + +/* + * Define the Common OAM cell format - F4 & F5 cells + * For F4 cells: + * VPI == User VPI + * VCI == (3 == Segment), (4 == End-to-End) + * + * For F5 cells: + * VPI == User VPI + * VCI == User VCI + * PT == (100 == Segment, 101 == End-to-End) + * + * OAM Cell Type & Function Type: + * + * OAM_TYPE = (0001 == Fault management) + * OAM_FUNC == (0000 == AIS, 0001 == RDI, 0100 == CC, + * 1000 == loopback) + * + * OAM_TYPE = (0010 == Performance management) + * OAM_FUNC == (0000 == Forward Monitoring(FM), + * 0001 == Backward monitoring(BR), + * 0010 == Monitoring & reporting (FM+BR)) + * + * OAM_TYPE = (1000 == Activation/Deactivation) + * OAM_FUNC == (0000 == Performance Monitoring, + * 0001 == Continuity Check) + * + * OAM_TYPE = (1111 == Sytem Management) + * OAM_FUNC == (0001 == Security - non-real-time, + * 0010 == Security - real-time) + * + */ +#define ATM_OAM_FAULT_MGMT 0x1 /* OAM Fault mgmt. code */ +#define ATM_OAM_PRFRM_MGMT 0x2 /* performance mgmt code */ +#define ATM_OAM_ACT_DEACT 0x8 /* OAM Activation/Deactivation + code */ +#define ATM_OAM_SYSTEM_MGMT 0xF /* System Management code */ + +#define ATM_OAM_AIS_FUNC 0x0 /* AIS function type */ +#define ATM_OAM_RDI_FUNC 0x1 /* RDI function type */ +#define ATM_OAM_CC_FUNC 0x4 /* OAM CC FM function code */ +#define ATM_OAM_LOOP_FUNC 0x8 /* Loopback function type */ + +#define ATM_OAM_F5_SEGMENT 0x4 /* Segment function */ +#define ATM_OAM_F5_ENDTOEND 0x5 /* End-to-End function */ +#define ATM_OAM_F4_SEGMENT 0x3 /* Segment function */ +#define ATM_OAM_F4_ENDTOEND 0x4 /* End-to-End function */ +#define ATM_OAM_F4_PTI_ZERO 0x0 /* PTI=0 for F4 OAM */ + +typedef struct atm_oam_hdr_t_ { + unsigned oam_gfc:4; /* GFC */ + unsigned oam_vpi:8; /* VPI */ + unsigned oam_vci_ms:4; /* VCI (Most Significant Bits) */ + + unsigned oam_vci_ls:12; /* VCI (Least Significant Bits) */ + unsigned oam_pt:3; /* Payload Type */ + unsigned oam_clp:1; /* Cell Loss Priority */ + u8 data[0]; +} atm_oam_hdr_t; + +typedef struct atm_oam_type_func_t_ { + u8 oam_type:4; + u8 oam_func:4; + u8 data[0]; +} atm_oam_type_func_t; + +/* +** IP Version 4 header +*/ + +/* + * version_hdr_len_words [7-4] IP Header Version + * [3-0] IP Header Length in 32-bit words + * tos Type of Service + * total_len_bytes Total IP datagram length in bytes + * (including IP header) + * identification Unique fragmentation identifier + * frag_flags_offset [15-13] Fragmentation flags + * [12-0] Fragmentation Offset + * ttl Time To Live + * protocol_id Protocol Identifier + * checksum 16-bit 1's complement IP Header checksum + * src_addr IP Source Address + * dest_addr IP Destination Address + */ +typedef struct ipv4_header { + u8 version_hdr_len_words; + u8 tos; + u16 total_len_bytes; + u16 identification; + u16 frag_flags_offset; + u8 ttl; + u8 protocol; + u16 checksum; + u32 src_addr; + u32 dest_addr; + u8 data[0]; +} ipv4_header; + +/*OPTIONS PACKET TYPE + * +-+-+-+-+-+-+-+-+ + * |C| CL| OP | + * +-+-+-+-+-+-+-+-+ + */ +typedef struct ipv4_options { + u8 copy :1 ; + u8 op_class :2 ; + u8 option :5 ; + u8 pad ; +}ipv4_options; + +#define LOOSE_SOURCE_ROUTE 131 +#define STRICT_SOURCE_ROUTE 137 +#define IPV4_NO_OPTIONS_HDR_LEN (sizeof(ipv4_header)) +#define IPV4_VERSION 4 +#define IPV4_HEADER_LENGTH_WORDS 5 +#define IPV4_VERSION_HDR_LEN_FIELD ((u8) 0x45) +#define IPV4_HEADER_LENGTH_WORDS 5 +#define IPV4_MIN_HEADER_LENGTH_BYTES 20 +#define IP_HDR_LEN sizeof(ipv4_header) +#define IPV4_VERSION_VALUE_SHIFT 4 + +#define IPV4_FRAG_OFFSET_MASK (0x1fff) +#define IPV4_FRAG_MF_MASK (0x2000) +#define IPV4_FRAG_MF_SHIFT (13) + +/* 0.0.0.0 */ +#define IP_BOOTP_SOURCE_ADDRESS 0 +/* 255.255.255.255 */ +#define IP_LIMITED_BROADCAST_ADDRESS 0xFFFFFFFF + +/* + * IPv4 header - version & length fields + */ +#define IP_VER_LEN 0x45 +#define IP_VER 0x4 +#define IP_MIN_LEN 0x5 +#define IP_VER_MASK 0xf0 +#define IP_LEN_MASK 0x0f + +/* + * IPv4 header - TOS field + */ +#define PS_IP_TOS_MASK 0xff +#define IP_PRECEDENCE_SHIFT 5 /* shift value up to precedence bits */ +#define IP_DSCP_SHIFT 2 /* shift value up to dscp bits */ + +#define IP_TOS_PRECEDENCE 0xe0 /* mask of precedence in tos byte */ +#define IP_TOS_NO_PRECEDENCE 0x1f +#define IP_TOS_LOW_DELAY 8 /* values must be shifted 1 bit */ +#define IP_TOS_HIGH_TPUT 4 /* before using */ +#define IP_TOS_HIGH_RELY 2 +#define IP_TOS_LOW_COST 1 +#define IP_TOS_NORMAL 0 +#define IP_TOS_MASK 0x1e /* mask of tos in tos byte */ +#define IP_TOS_MBZ_MASK 0x01 /* mask for MZB bit in tos byte */ +#define IP_TOS_DSCP 0xfc /* mask for dscp in tos byte */ +#define IP_TOS_NO_DSCP 0x03 + +#define IP_TOS_METRIC_TYPES 8 +#define IP_TOS_SHIFT 1 + +#define IP_TOS_PRECEDENCE_MASK (IP_TOS_PRECEDENCE | IP_TOS_MASK) + +/* + * IP TOS Precedence values (High order 3 bits) + */ +#define TOS_PREC_NET_CONTROL 0xe0 +#define TOS_PREC_INET_CONTROL 0xc0 +#define TOS_PREC_CRIT_ECP 0xa0 +#define TOS_PREC_FLASH_OVER 0x80 +#define TOS_PREC_FLASH 0x60 +#define TOS_PREC_IMMEDIATE 0x40 +#define TOS_PREC_PRIORITY 0x20 +#define TOS_PREC_ROUTINE 0x00 +#define TOS_PREC_ILLEGAL 0xff /* invalid precedence value */ + +#define TOS_PREC_NET_CONTROL_NUM 7 +#define TOS_PREC_INET_CONTROL_NUM 6 +#define TOS_PREC_CRIT_ECP_NUM 5 +#define TOS_PREC_FLASH_OVER_NUM 4 +#define TOS_PREC_FLASH_NUM 3 +#define TOS_PREC_IMMEDIATE_NUM 2 +#define TOS_PREC_PRIORITY_NUM 1 +#define TOS_PREC_ROUTINE_NUM 0 + + + +/* + * IPv4 header - flags and fragment offset fields + */ +#define IP_FRAG_OFFSET_MASK 0x1fff + + +#define IP_FRAG_MORE_MASK 0x2000 +#define IP_FRAG_DF_MASK 0x4000 +#define IP_FRAG_UNDEF_MASK 0x8000 +#define IP_FRAG_NO_DF_SET 0x0000 + +/* bit definitions for fragment flags */ +#define IP_FRAG_MORE_BIT 13 +#define IP_FRAG_DF_BIT 14 +#define IP_FRAG_UNDEF_BIT 15 + +/* + * IPv4 header - TTL field + */ +#define TTL_DEFAULT 255 +#define TTL_1 1 +#define TTL_2 2 +#define TTL_255 255 + + +/* + * IPv4 header - protocol field + * + * ICMP_PROT 1 ICMP + * IGMP_PROT 2 group management + * GGP_PROT 3 GGP + * IPINIP_PROT 4 IPv4 in IPv4 encapsulation + * TCP_PROT 6 TCP + * EGP_PROT 8 EGP + * IGRP_PROT 9 IGRP + * UDP_PROT 17 UDP + * HMP_PROT 20 HMP + * RDP_PROT 27 RDP + * IPV6_INIP_PROT 41 IPV6 in IPv4 encapsulation + * RSVP_PROT 46 RSVP + * GRE_PROT 47 GRE + * ESP_PROT 50 ESP + * AHP_PROT 51 AHP + * SDNS0_PROT 53 SNDS + * NHRP_PROT 54 NHRP + * SDNS1_PROT 55 SDNS1 + * HELLO_PROT 63 HELLO + * ND_PROT 77 ND + * EONIP_PROT 80 CLNS over IP + * VINES_PROT 83 Banyan Vines + * NEWIGRP_PROT 88 IGRP + * OSPF_PROT 89 OSPF + * FST_RSRB_PROT 90 RSRB + * FST_DLSW_PROT 91 DLSW + * NOSIP_PROT 94 KA9Q/NOS compatible IP over IP + * PIM_PROT 103 PIMv2 + * PCP_PROT 108 PCP + * PGM_PROT 113 PGM + * MAX_PROT 113 maximum protocol number in the above list, + * used in creating case registry + */ +#define ICMP_PROT 1 +#define IGMP_PROT 2 +#define GGP_PROT 3 +#define IPINIP_PROT 4 +#define TCP_PROT 6 +#define EGP_PROT 8 +#define IGRP_PROT 9 +#define UDP_PROT 17 +#define HMP_PROT 20 +#define RDP_PROT 27 +#define IPV6_INIP_PROT 41 +#define RSVP_PROT 46 +#define GRE_PROT 47 +#define ESP_PROT 50 +#define AHP_PROT 51 +#define SDNS0_PROT 53 +#define NHRP_PROT 54 +#define SDNS1_PROT 55 +#define HELLO_PROT 63 +#define ND_PROT 77 +#define EONIP_PROT 80 +#define VINES_PROT 83 +#define NEWIGRP_PROT 88 +#define OSPF_PROT 89 +#define FST_RSRB_PROT 90 +#define FST_DLSW_PROT 91 +#define NOSIP_PROT 94 +#define PIM_PROT 103 +#define PCP_PROT 108 +#define PGM_PROT 113 +#define MAX_PROT 113 + +/*Well Known Application ports */ +#define FTP_PORT 21 /* For control connection */ +/* + * TCP header + */ +typedef struct tcp_hdr_type { + u16 src_port; + u16 dest_port; + u32 seq_num; + u32 ack_num; + u8 hdr_len; + u8 flags; + u16 window_size; + u16 tcp_checksum; + u16 urgent_pointer; + u8 option_data[0]; +} tcp_hdr_type; + +#define TCP_FLAG_FIN 0x01 +#define TCP_FLAG_SYN 0x02 +#define TCP_FLAG_RST 0x04 +#define TCP_FLAG_PUSH 0x08 +#define TCP_FLAG_ACK 0x10 +#define TCP_FLAG_URG 0x20 +#define TCP_FLAG_ECE 0x40 +#define TCP_FLAG_CWR 0x80 + +/* + * TCP Option + */ +typedef struct tcp_option_s { + u8 kind; + u8 length; + u8 data[0]; +} tcp_option_t; + +#define TCP_END_OPTIONS_LIST 0 +#define TCP_OPTION_NOP 1 +#define TCP_OPTION_MSS 2 +#define TCP_OPTION_WINDOW_SCALE 3 +#define TCP_OPTION_SACK_PERMITTED 4 +#define TCP_OPTION_SACK_DATA 5 +#define TCP_OPTION_ECHO 6 +#define TCP_OPTION_ECHO_REPLY 7 +#define TCP_OPTION_TSOPT 8 +/* + 9 2 Partial Order Connection Permitted. RFC 1693 + 10 3 Partial Order Service Profile. RFC 1693 + 11 6 CC, Connection Count. RFC 1644 + 12 6 CC.NEW RFC 1644 + 13 6 CC.ECHO RFC 1644 + 14 3 TCP Alternate Checksum Request. RFC 1146 + 15 Variable. TCP Alternate Checksum Data. RFC 1146 + 16 Skeeter. + 17 Bubba. + 18 3 Trailer Checksum Option. +*/ +#define TCP_OPTION_MD5_SIGNATURE 19 +/* + 20 SCPS Capabilities. + 21 Selective Negative Acknowledgements. + 22 Record Boundaries. + 23 Corruption experienced. + 24 SNAP. + 25 + 26 TCP Compression Filter. +*/ +#define TCP_OPTION_QUICK_START 27 + +#define TCP_OPTION_NUM_MAX 27 + +#ifdef TARGET_CISCO +#define tcp_printf(format_str, params...) //printf(format_str, ## params) +#else +#define tcp_printf(format_str, params...) printf(format_str, ## params) +#endif + +typedef struct udp_hdr_type { + u16 src_port; + u16 dest_port; + u16 udp_length; + u16 udp_checksum; +} udp_hdr_type_t; + +#define TYPE_IPV6 0x86dd +#define TYPE_IPV4 0x0800 + +/* + * version_trafficclass_flowlabel [31:28] IP Header Version, + [27:20] traffic_class, + [19:0] flow_label[20] + * payload_length Length of packet in bytes excluding header size(s) + * next_header Identifies the type of header following the IPv6 header + * hop_limit Decremented by 1 by each forwarding node, packet discarded when zero + * src_addr IPv6 Source Address + * dst_addr IPv6 Destination Address + */ +typedef struct ipv6_header { + u32 version_trafficclass_flowlabel; + u16 payload_length; + u8 next_header; + u8 hop_limit; + u32 src_addr[4]; + u32 dst_addr[4]; + u8 data[0]; +} ipv6_header_t; + +#define IPV6_HDR_LEN 40 +#define IPV6_HDR_LEN_WORDS 10 +#define IPV6_FLABLE_MASK 0x000FFFFF +#define IPV6_MIN_PATH_MTU (1280) + +#define IPV6_GET_IP_VER(ih) ((clib_net_to_host_u32((ih) \ + ->version_trafficclass_flowlabel) >> 28) & 0xf) +#define IPV6_GET_TOS(ih) ((clib_net_to_host_u32((ih) \ + ->version_trafficclass_flowlabel) >> 20) & 0xff) +#define IPV6_GET_FLOW_LABEL(ih) ((clib_net_to_host_u32((ih) \ + ->version_trafficclass_flowlabel)) & 0xfffff) + +#define IPV6_VERSION_VALUE (6) +#define IPV6_VERSION_VALUE_SHIFT (28) +#define IPV6_TRAFFIC_CLASS_VALUE_SHIFT (20) +#define IPV6_TRAFFIC_CLASS_VALUE_MASK (0xff) + +#define IPV6_PROTO_HOPOPTS 0 +#define IPV6_PROTO_TCP 6 +#define IPV6_PROTO_UDP 17 +#define IPV6_PROTO_IPV6 41 +#define IPV6_PROTO_ROUTING 43 +#define IPV6_PROTO_FRAGMENT 44 +#define IPV6_PROTO_DESTOPTS 60 +#define IPV6_PROTO_ESP 50 +#define IPV6_PROTO_AH 51 +#define IPV6_PROTO_ICMPV6 58 +#define IPV6_PROTO_NONE 59 + +/* standard v6 extension header are 2 tytes + * one byte next header + * one byte header length + */ + +typedef struct ipv6_frag_header { + u8 next_header; + u8 reserved; + u16 frag_offset_res_m; + u32 identification; +} ipv6_frag_header_t; + +#define IPV6_FRAG_HDR_LEN (sizeof(ipv6_frag_header_t)) + +#define IPV6_FRAG_OFFSET_MASK (0xFFF8) +#define IPV6_FRAG_OFFSET_SHIFT (3) +#define IPV6_FRAG_MORE_FRAG_MASK (0x0001) + +#define IPV6_TOS_SHIFT 20 +#define IPV6_TOS_SHIFT_HLF_WD 4 +#define IPV6_NEXT_HDR_SHIFT 8 + +typedef struct ipv6_routing_header { + u8 next_header; + u8 hdr_ext_len; + u8 routing_type; + u8 segments_left; + u8 data[0]; +} ipv6_routing_header_t; +#define IPV6_ROUTING_HDR_LEN (sizeof(ipv6_routing_header_t)) + +typedef struct ipv6_hop_header { + u8 next_header; + u8 hdr_ext_len; + u8 options[0]; +} ipv6_hop_header_t; +#define IPV6_HOP_LEN (sizeof(ipv6_hop_header_t)) + +typedef struct ipv6_dest_opt_header { + u8 next_header; + u8 hdr_ext_len; + u8 options[0]; +} ipv6_dest_opt_header_t; +#define IPV6_DESTOPT_LEN (sizeof(ipv6_dest_opt_header_t)) + + +/* Definition of ICMP header */ +typedef struct icmp_v4_s { + u8 type; + u8 code; + u16 checksum; + u16 identifier; + u16 sequence; +} icmp_v4_t; + +#define ICMPV4_HDR_SIZE (sizeof(icmp_v4_t)) +#define ICMPV4_ECHOREPLY 0 /* Type: echo reply */ +#define ICMPV4_ECHO 8 /* Type: echo request */ + +#define ICMPV4_UNREACHABLE 3 /* Type: destination unreachable */ +#define ICMPV4_UNRNET 0 /* Code: Net unreachable */ +#define ICMPV4_UNRHOST 1 /* Code: host unreachable */ +#define ICMPV4_UNRPROT 2 /* Code: protocol unreachable */ +#define ICMPV4_UNRPORT 3 /* Code: port unreachable */ +#define ICMPV4_UNRFRAG 4 /* Code: frag req DF set */ +#define ICMPV4_UNRADMIN 13 /* Code: administratively prohib. */ +#define ICMPV4_SOURCEROUTE_FAILED 5 /* Code: administratively prohib. */ + +#define ICMPV4_SRC_ROUTE_FAIL 5 /* Code: Source Route Failed */ +#define ICMPV4_NO_ROUTE_DESTN_8 8 /* Code: No Route to Destn */ +#define ICMPV4_NO_ROUTE_DESTN_11 11 /* Code: No Route to Destn */ +#define ICMPV4_NO_ROUTE_DESTN_12 12 /* Code: No Route to Destn */ + +#define ICMPV4_ADMIN_PROH_9 9 /* Code: Administratively Prohibited */ +#define ICMPV4_ADMIN_PROH_10 10 /* Code: Administratively Prohibited */ +#define ICMPV4_PREC_CUTOFF 15 /* Code: Precedence Cutoff */ + + +#define ICMPV4_TIMEEXCEEDED 11 /* Type: time exceeded */ +#define ICMPV4_TIMTTL 0 /* Code: ttl in transit code */ + +#define ICMPV4_PARAMETER_PROBLEM 12 /* Type: Parameter Problem */ +#define ICMPV4_PARAM_ERROR 0 /* Code: Pointer to Error */ +#define ICMPV4_MISSING_OPTION_CODE 1 /* Code: Mission option */ +#define ICMPV4_PARAM_BAD_LEN 2 /* Code: Bad Length */ + +#define ICMPV4_CONVERSION_ERROR 31 +#define ICMPV4_SOURCE_QUENCH 4 +#define ICMPV4_REDIRECT 5 +#define ICMPV4_TIMESTAMP 13 +#define ICMPV4_TIMESTAMP_REPLY 14 +#define ICMPV4_INFO_REQUEST 15 +#define ICMPV4_INFO_REPLY 16 +#define ICMPV4_ADDR_MASK_REQUEST 17 +#define ICMPV4_ADDR_MASK_REPLY 18 + +typedef struct icmp_v6_s { + + u8 type; + u8 code; + u16 checksum; + + u32 data[0]; +} icmp_v6_t; + +typedef struct pseudo_v6_header { + u32 src_addr[4]; + u32 dst_addr[4]; + u16 payload_length; + u16 next_header; +} pseudo_v6_header_t; + + +#define ICMPV6_ECHO 128 +#define ICMPV6_ECHO_REPLY 129 +#define ICMPV6_PKT_TOO_BIG 2 +#define ICMPV6_TIMEEXCEEDED 3 +#define ICMPV6_TIMTTL 0 +#define ICMPV6_PARAMETER_PROBLEM 4 +#define ICMPV6_UNREACHABLE 1 +#define ICMPV6_NEIGHBOR_SOLICITAION 135 +#define ICMPV6_NEIGHBOR_ADVT 136 +/* ICMP V6 generated packet size */ +#define ICMPV6_ERR_SIZE 48 +#define ICMPV6_HDR_SIZE (sizeof(icmp_v6_t) +sizeof(u32)) + +/* Code for Type 1 */ +#define ICMPV6_UNRDESTN 0 /* Code: No route to Desnt */ +#define ICMPV6_ADM_PROH 1 /* Code: Adminitrative Prohibited */ +#define ICMPV6_SRC_ADD_SCOPE 2 /* Code: Source Address beyond scope */ +#define ICMPV6_UNRHOST 3 /* Code: Host Unreachable */ +#define ICMPV6_UNRPORT 4 /* Code: Port UnReachable */ + +#define ICMPV6_UNRPROT 1 /* type 4 - Code: No route to Desnt */ + +#define ICMPV6_PTB_CODE 0 /* Code: For PTB */ +#define ICMPV6_PARAM_CODE 0 /* Code: For Parameter Problem */ +#define ICMPV6_UNREC_HDR 1 /* Code: For Parameter Problem */ +#define ICMPV6_SRC_ADD_FAIL 5 /* Code: For Source address failed */ +#define ICMP_ECHO_REPLY_CODE 0 +#define DEFAULT_TTL_HOPLIMIT_VAL 64 + +typedef struct pptp_hdr_type { + + u16 flags_ver; + u16 proto_type; /* PPP = 0x880B */ + u16 payload_len; + u16 call_id; + u32 seq_no; + u32 ack_no; + +} pptp_hdr_type_t; + +/* + * NAME + * + * tcp_findoption + * + * SYNOPSIS + * u8* tcp_findoption (tcp_hdr_t *tcp, uchar option) + * + * PARAMETERS + * tcp - pointer to TCP header + * option - TCP option + * + * RETURNS + * This function returns a pointer to the option found, + * otherwise returns null. + * + * + * DESCRIPTION + * This function searches the option and returns a pointer to the + * matched option field containing option kind/length/data sub-fields. + * + */ +static inline u8* tcp_findoption (tcp_hdr_type *tcp, u8 option) +{ + u8*data; + u8 len, optlen; + + data = tcp->option_data; + len = ((tcp->hdr_len>>4) << 2) - sizeof(tcp_hdr_type); + +#define MAXTCPOPTIONBYTES 40 +#define MINTCPOPTIONLENGTH 2 + + while (len) { + if (PREDICT_TRUE(option == data[0])) { + return (data); + } else { + switch (data[0]) { + case TCP_END_OPTIONS_LIST: + return (NULL); + case TCP_OPTION_NOP: + len -= 1; + data += 1; + break; + default: + /* Sanity check the length. */ + optlen = data[1]; + if ((optlen < MINTCPOPTIONLENGTH) || + (optlen > MAXTCPOPTIONBYTES) || + (optlen > len)) { + return (NULL); + } + len -= optlen; + data += optlen; + break; + } + } + } + + return (NULL); +} + + +static inline u32 crc_calc (ipv4_header *ipv4) +{ + u16 *ipv4_word_ptr = (u16 *) ipv4; + u32 crc32; + /* + * Add all fields except the checksum field + */ + crc32 = (u32)clib_net_to_host_u16(*ipv4_word_ptr) + + (u32)clib_net_to_host_u16(*(ipv4_word_ptr + 1)) + + (u32)clib_net_to_host_u16(*(ipv4_word_ptr + 2)) + + (u32)clib_net_to_host_u16(*(ipv4_word_ptr + 3)) + + (u32)clib_net_to_host_u16(*(ipv4_word_ptr + 4)) + + (u32)clib_net_to_host_u16(*(ipv4_word_ptr + 6)) + + (u32)clib_net_to_host_u16(*(ipv4_word_ptr + 7)) + + (u32)clib_net_to_host_u16(*(ipv4_word_ptr + 8)) + + (u32)clib_net_to_host_u16(*(ipv4_word_ptr + 9)); + + /* Add in the carry of the original sum */ + crc32 = (crc32 & 0xFFFF) + (crc32 >> 16); + /* Add in the carry of the final sum */ + crc32 = (crc32 & 0xFFFF) + (crc32 >> 16); + + return crc32; +} + +#endif /* __TCP_HEADER_DEFINITIONS_H__ */ diff --git a/vnet/vnet/vcgn/vcgn_classify.c b/vnet/vnet/vcgn/vcgn_classify.c new file mode 100644 index 00000000000..518f9102317 --- /dev/null +++ b/vnet/vnet/vcgn/vcgn_classify.c @@ -0,0 +1,1419 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vppinfra/pool.h> + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> + +#include "cnat_db.h" +#include "cnat_global.h" +#include "cnat_cli.h" +#include "cnat_config.h" +#include "cnat_logging.h" +#include "cnat_config_api.h" +#include "cnat_show_api.h" +#include "cnat_show_response.h" +#include "cnat_ipv4_udp.h" +#include "cnat_common_api.h" + +#include <arpa/inet.h> + +typedef struct { + u32 cached_next_index; + + /* inside, outside interface handles */ + u32 inside_sw_if_index; + u32 outside_sw_if_index; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} vcgn_classify_main_t; + +typedef struct { + /* $$$$ fill in with per-pkt trace data */ + u32 next_index; + u32 sw_if_index; + u32 orig_dst_address; + u16 orig_dst_port; +} vcgn_classify_trace_t; + +#define FIND_MY_VRF_USING_I_VRF_ID \ + my_vrfmap_found = 0; \ + pool_foreach (my_vrfmap, cnat_map_by_vrf, ({ \ + if (my_vrfmap->i_vrf_id == i_vrf_id) { \ + my_vrfmap_found = 1; \ + my_vrfmap_temp = my_vrfmap; \ + break; \ + } \ + })); + + +/* packet trace format function */ +static u8 * format_swap_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + vcgn_classify_trace_t * t = va_arg (*args, vcgn_classify_trace_t *); + + s = format (s, "VCGN_CLASSIFY: dst %U dst_port %d sw_if_index %d next %d", + format_ip4_address, (ip4_header_t *) &t->orig_dst_address, + clib_net_to_host_u16(t->orig_dst_port), + t->sw_if_index, t->next_index); + return s; +} + +vcgn_classify_main_t vcgn_classify_main; + +vlib_node_registration_t vcgn_classify_node; + +#define foreach_vcgn_classify_error \ +_(PACKETS_RECEIVED, "total packets received") \ +_(V4_PACKETS_PROCESSED, "ipv4 packets processed for vCGN") \ +_(V4_PACKETS_PUNTED, "ipv4 packets punted") \ +_(V6_PACKETS_PUNTED, "ipv6 packets punted") \ +_(MPLS_PACKETS_PUNTED, "mpls unicast packets punted") \ +_(ETH_PACKETS_PUNTED, "ethernet packets punted") + + +typedef enum { +#define _(sym,str) VCGN_CLASSIFY_ERROR_##sym, + foreach_vcgn_classify_error +#undef _ + VCGN_CLASSIFY_N_ERROR, +} vcgn_classify_error_t; + +static char * vcgn_classify_error_strings[] = { +#define _(sym,string) string, + foreach_vcgn_classify_error +#undef _ +}; + +/* + * To drop a pkt and increment one of the previous counters: + * + * set b0->error = error_node->errors[VCGN_CLASSIFY_ERROR_EXAMPLE]; + * set next0 to a disposition index bound to "error-drop". + * + * To manually increment the specific counter VCGN_CLASSIFY_ERROR_EXAMPLE: + * + * vlib_node_t *n = vlib_get_node (vm, vcgn_classify.index); + * u32 node_counter_base_index = n->error_heap_index; + * vlib_error_main_t * em = &vm->error_main; + * em->counters[node_counter_base_index + VCGN_CLASSIFY_ERROR_EXAMPLE] += 1; + * + */ + +typedef enum { + VCGN_CLASSIFY_NEXT_IP4_INPUT, + VCGN_CLASSIFY_NEXT_IP6_INPUT, + VCGN_CLASSIFY_NEXT_MPLS_INPUT, + VCGN_CLASSIFY_NEXT_ETHERNET_INPUT, + VCGN_CLASSIFY_NEXT_UDP_INSIDE, + VCGN_CLASSIFY_NEXT_UDP_OUTSIDE, + VCGN_CLASSIFY_NEXT_TCP_INSIDE, + VCGN_CLASSIFY_NEXT_TCP_OUTSIDE, + VCGN_CLASSIFY_NEXT_ICMP_Q_INSIDE, + VCGN_CLASSIFY_NEXT_ICMP_Q_OUTSIDE, + VCGN_CLASSIFY_NEXT_ICMP_E_INSIDE, + VCGN_CLASSIFY_NEXT_ICMP_E_OUTSIDE, + VCGN_CLASSIFY_N_NEXT, +} vcgn_classify_next_t; + +static uword +vcgn_classify_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + vcgn_classify_next_t next_index; + vcgn_classify_main_t * vcm = &vcgn_classify_main; + vlib_node_t *n = vlib_get_node (vm, vcgn_classify_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + u16 *l3_type; + int counter; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + #if 0 + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + next0 = vcm->cached_next_index; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + next1 = vcm->cached_next_index; + + /* $$$$ your message in this space. Process 2 x pkts */ + em->counters[node_counter_base_index + VCGN_CLASSIFY_ERROR_PACKETS_RECEIVED] += 2; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + vcgn_classify_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + vcgn_classify_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + #endif /* if 0 */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ip4_header_t * h0; + //ipv4_header *h0; + ethernet_header_t *eth0; + icmp_v4_t *icmp; + u8 icmp_type; + u8 ipv4_hdr_len; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + eth0 = (ethernet_header_t *) vlib_buffer_get_current(b0); + u16 *etype = ð0->type; + + /* vlan tag 0x8100 */ + if (*etype == clib_host_to_net_u16(ETHERNET_TYPE_VLAN)) { + l3_type = (etype + 1); /* Skip 2 bytes of vlan id */ + vlib_buffer_advance(b0, 18); + } else { + l3_type = etype; + vlib_buffer_advance(b0, 14); + } + /* Handling v4 pkts 0x800 */ + if (*l3_type == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) { + + h0 = vlib_buffer_get_current (b0); + + u8 protocol_type = h0->protocol; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + next0 = VCGN_CLASSIFY_NEXT_IP4_INPUT; + counter = VCGN_CLASSIFY_ERROR_V4_PACKETS_PROCESSED; + + if (protocol_type == 0x11) { /* UDP# 17 */ + next0 = (sw_if_index0 == vcm->inside_sw_if_index) ? + VCGN_CLASSIFY_NEXT_UDP_INSIDE : next0; + + next0 = (sw_if_index0 == vcm->outside_sw_if_index) ? + VCGN_CLASSIFY_NEXT_UDP_OUTSIDE : next0; + } else if (protocol_type == 0x06) { /* TCP# 6 */ + next0 = (sw_if_index0 == vcm->inside_sw_if_index) ? + VCGN_CLASSIFY_NEXT_TCP_INSIDE : next0; + + next0 = (sw_if_index0 == vcm->outside_sw_if_index) ? + VCGN_CLASSIFY_NEXT_TCP_OUTSIDE : next0; + } else if (protocol_type == 0x01) { /* ICMP # 1 */ + + ipv4_hdr_len = (h0->ip_version_and_header_length & 0xf) << 2; + icmp = (icmp_v4_t *)((u8*)h0 + ipv4_hdr_len); + icmp_type = icmp->type; + + if ((icmp_type == ICMPV4_ECHO) || + (icmp_type == ICMPV4_ECHOREPLY)) { + next0 = (sw_if_index0 == vcm->inside_sw_if_index) ? + VCGN_CLASSIFY_NEXT_ICMP_Q_INSIDE : next0; + + next0 = (sw_if_index0 == vcm->outside_sw_if_index) ? + VCGN_CLASSIFY_NEXT_ICMP_Q_OUTSIDE : next0; + } else { + next0 = (sw_if_index0 == vcm->inside_sw_if_index) ? + VCGN_CLASSIFY_NEXT_ICMP_E_INSIDE : next0; + + next0 = (sw_if_index0 == vcm->outside_sw_if_index) ? + VCGN_CLASSIFY_NEXT_ICMP_E_OUTSIDE : next0; + + } + } else { + /* cannot do NATting with this L4 protocol */ + counter = VCGN_CLASSIFY_ERROR_V4_PACKETS_PUNTED; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + udp_header_t * u0 = (udp_header_t *)(h0+1); + vcgn_classify_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->orig_dst_address = h0->dst_address.as_u32; + t->orig_dst_port = u0->dst_port; + } + + } else if (*l3_type == clib_host_to_net_u16(ETHERNET_TYPE_IP6)) { + + /* IPv6 0x86DD */ + next0 = VCGN_CLASSIFY_NEXT_IP6_INPUT; + counter = VCGN_CLASSIFY_ERROR_V6_PACKETS_PUNTED; + + } else if (*l3_type == + clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)) { + + /* MPLS unicast 0x8847 */ + next0 = VCGN_CLASSIFY_NEXT_MPLS_INPUT; + counter = VCGN_CLASSIFY_ERROR_MPLS_PACKETS_PUNTED; + } else { /* Remaining all should be pushed to "ethernet-input" */ + + next0 = VCGN_CLASSIFY_NEXT_ETHERNET_INPUT; + counter = VCGN_CLASSIFY_ERROR_ETH_PACKETS_PUNTED; + } + + em->counters[node_counter_base_index + counter] += 1; + em->counters[node_counter_base_index + + VCGN_CLASSIFY_ERROR_PACKETS_RECEIVED] += 1; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (vcgn_classify_node) = { + .function = vcgn_classify_node_fn, + .name = "vcgn-classify", + .vector_size = sizeof (u32), + .format_trace = format_swap_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(vcgn_classify_error_strings), + .error_strings = vcgn_classify_error_strings, + + .n_next_nodes = VCGN_CLASSIFY_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [VCGN_CLASSIFY_NEXT_IP4_INPUT] = "ip4-input", + [VCGN_CLASSIFY_NEXT_IP6_INPUT] = "ip6-input", + [VCGN_CLASSIFY_NEXT_MPLS_INPUT] = "mpls-gre-input", + [VCGN_CLASSIFY_NEXT_ETHERNET_INPUT] = "ethernet-input", + [VCGN_CLASSIFY_NEXT_UDP_INSIDE] = "vcgn-v4-udp-i2o", + [VCGN_CLASSIFY_NEXT_UDP_OUTSIDE] = "vcgn-v4-udp-o2i", + [VCGN_CLASSIFY_NEXT_TCP_INSIDE] = "vcgn-v4-tcp-i2o", + [VCGN_CLASSIFY_NEXT_TCP_OUTSIDE] = "vcgn-v4-tcp-o2i", + [VCGN_CLASSIFY_NEXT_ICMP_Q_INSIDE] = "vcgn-v4-icmp-q-i2o", + [VCGN_CLASSIFY_NEXT_ICMP_Q_OUTSIDE] = "vcgn-v4-icmp-q-o2i", + [VCGN_CLASSIFY_NEXT_ICMP_E_INSIDE] = "vcgn-v4-icmp-e-i2o", + [VCGN_CLASSIFY_NEXT_ICMP_E_OUTSIDE] = "vcgn-v4-icmp-e-o2i" + }, +}; + + +/* A test function to init the vrf map */ + +clib_error_t *vcgn_classify_init (vlib_main_t *vm) +{ + vcgn_classify_main_t * mp = &vcgn_classify_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + mp->inside_sw_if_index = 1; + mp->outside_sw_if_index = 0; + + dpdk_set_next_node (DPDK_RX_NEXT_IP4_INPUT, "vcgn-classify"); + + { + pg_node_t * pn; + pn = pg_get_node (vcgn_classify_node.index); + pn->unformat_edit = unformat_pg_ip4_header; + } + return 0; +} + +VLIB_INIT_FUNCTION (vcgn_classify_init); + +/* Show command handlers */ +static clib_error_t * +show_vcgn_stats_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + if (cnat_db_init_done) { + cnat_nat44_handle_show_stats(vm); + } else { + vlib_cli_output(vm, "vCGN is not configured !!\n"); + } + return 0; +} + + +static clib_error_t * +show_vcgn_config_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + cnat_nat44_handle_show_config(vm); + return 0; +} + +static clib_error_t * +show_vcgn_inside_translation_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + /* + vnet_main_t * vnm = vnet_get_main(); + */ + vcgn_classify_main_t * vcm = &vcgn_classify_main; + spp_api_cnat_v4_show_inside_entry_req_t inside_req; + u8 *proto; + ip4_address_t inside_addr; + u32 start_port = 1; + u32 end_port = 65535; + + inside_req.start_port = start_port; + inside_req.end_port = end_port; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat(input, "protocol %s", &proto)) { + if (!strncmp((char *) proto, "udp", 3)) { + inside_req.protocol = 1; + } else if (!strncmp((char *) proto, "tcp", 3)) { + inside_req.protocol = 2; + } else { + inside_req.protocol = 3; + } + } else if (unformat (input, "inside-addr %U", + unformat_ip4_address, &inside_addr)) { + inside_req.ipv4_addr = clib_net_to_host_u32(inside_addr.as_u32); + } else if (unformat(input, "start-port %u", &start_port)) { + inside_req.start_port = start_port; + } else if (unformat(input, "end-port %u", &end_port)) { + inside_req.end_port = end_port; + } else { break;} + } + inside_req.vrf_id = vcm->inside_sw_if_index; + inside_req.flags |= CNAT_TRANSLATION_ENTRY_DYNAMIC; /* as of now only dynamic */ + inside_req.all_entries = 0; /* we can see it later */ +#if DEBUG + vlib_cli_output(vm, "proto %d, inside-addr 0x%x, start_port %u, " + "end_port %u, vrf 0x%x\n", + inside_req.protocol, + inside_req.ipv4_addr, + inside_req.start_port, + inside_req.end_port, + vcm->inside_sw_if_index); +#endif + if (cnat_db_init_done) { + cnat_v4_show_inside_entry_req_t_handler(&inside_req, vm); + } else { + vlib_cli_output(vm, "vCGN is not configured !!\n"); + } + return 0; +} + + +static clib_error_t * +show_vcgn_outside_translation_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + void cnat_v4_show_outside_entry_req_t_handler + (spp_api_cnat_v4_show_outside_entry_req_t *mp, vlib_main_t *vm); + vcgn_classify_main_t * vcm = &vcgn_classify_main; + spp_api_cnat_v4_show_outside_entry_req_t outside_req; + u8 *proto; + ip4_address_t outside_addr; + u32 start_port = 1; + u32 end_port = 65535; + + outside_req.start_port = start_port; + outside_req.end_port = end_port; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat(input, "protocol %s", &proto)) { + if (!strncmp((char *) proto, "udp", 3)) { + outside_req.protocol = 1; + } else if (!strncmp((char *) proto, "tcp", 3)) { + outside_req.protocol = 2; + } else { + outside_req.protocol = 3; + } + } else if (unformat (input, "outside-addr %U", + unformat_ip4_address, &outside_addr)) { + outside_req.ipv4_addr = clib_net_to_host_u32(outside_addr.as_u32); + } else if (unformat(input, "start-port %u", &start_port)) { + outside_req.start_port = start_port; + } else if (unformat(input, "end-port %u", &end_port)) { + outside_req.end_port = end_port; + } else { break;} + } + outside_req.vrf_id = vcm->outside_sw_if_index; + outside_req.flags |= CNAT_TRANSLATION_ENTRY_DYNAMIC; /* as of now only dynamic */ +#if DEBUG + vlib_cli_output(vm, "proto %d, outside-addr 0x%x, start_port %u, " + "end_port %u, vrf 0x%x\n", + outside_req.protocol, + outside_req.ipv4_addr, + outside_req.start_port, + outside_req.end_port, + vcm->outside_sw_if_index); +#endif + if (cnat_db_init_done) { + cnat_v4_show_outside_entry_req_t_handler(&outside_req, vm); + } else { + vlib_cli_output(vm, "vCGN is not configured !!\n"); + } + return 0; +} + + +/* Config command handlers */ +static clib_error_t * +set_vcgn_inside_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + vcgn_classify_main_t * vcm = &vcgn_classify_main; + u32 inside_sw_if_index = 1; + u32 outside_sw_if_index = ~0; + void cnat_db_v2_init (void ); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat(input, "%U", + unformat_vnet_sw_interface, vnm, &inside_sw_if_index)) + ; + else if (unformat(input, "outside %U", + unformat_vnet_sw_interface, vnm, &outside_sw_if_index)) + ; + else break; + } + if (inside_sw_if_index == ~0 || + outside_sw_if_index == ~0) + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + if (inside_sw_if_index == outside_sw_if_index) + return clib_error_return (0, "inside and outside interfaces can't be the same..."); + + vcm->inside_sw_if_index = inside_sw_if_index; + vcm->outside_sw_if_index = outside_sw_if_index; + + cnat_db_v2_init(); + + /* Turn on the db scanner process */ + cnat_scanner_db_process_turn_on(vm); + return 0; +} + +static clib_error_t * +set_vcgn_map_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vcgn_classify_main_t * vcm = &vcgn_classify_main; + ip4_address_t lo, hi; + spp_api_cnat_v4_add_vrf_map_t map; + int i; + + vnet_hw_interface_t *inside_hw_if_index = NULL; + vnet_hw_interface_t *outside_hw_if_index = NULL; + + if (!unformat (input, "%U", unformat_ip4_address, &lo)) + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + if (unformat (input, "- %U", unformat_ip4_address, &hi)) + ; + + /* $$$$ remember to set i_vrf, i_vrf_id as needed */ + + /* Fill the structure spp_api_cnat_v4_add_vrf_map_t & let this API handle it */ + /* i_vrf_id & o_vrf_id are 32-bit & i_vrf, o_vrf are 16 bit */ + map.i_vrf_id = vcm->inside_sw_if_index; + map.o_vrf_id = vcm->outside_sw_if_index; + map.i_vrf = vcm->inside_sw_if_index; + map.o_vrf = vcm->outside_sw_if_index; + + map.start_addr[0] = clib_net_to_host_u32(lo.as_u32); + map.end_addr[0] = clib_net_to_host_u32(hi.as_u32); + + for (i = 0; i < CNAT_MAX_VRFMAP_ENTRIES; i++) { + vrf_map_array[i] = VRF_MAP_ENTRY_EMPTY; + } + cnat_nat44_add_vrf_map_t_handler(&map, vm); + +#if 1 + inside_hw_if_index = vnet_get_sup_hw_interface(vcm->vnet_main, vcm->inside_sw_if_index); + if (inside_hw_if_index) { + vnet_hw_interface_rx_redirect_to_node(vcm->vnet_main, + inside_hw_if_index->hw_if_index, vcgn_classify_node.index); + } + outside_hw_if_index = vnet_get_sup_hw_interface(vcm->vnet_main, vcm->outside_sw_if_index); + if (outside_hw_if_index) { + vnet_hw_interface_rx_redirect_to_node(vcm->vnet_main, + outside_hw_if_index->hw_if_index, vcgn_classify_node.index); + } +#endif + return 0; +} + +static clib_error_t * +set_vcgn_tcp_timeout_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + /* + vnet_main_t * vnm = vnet_get_main(); + vcgn_classify_main_t * vcm = &vcgn_classify_main; + */ + u32 act_timeout = 0; + u32 init_timeout = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat(input, "active %u", &act_timeout)) + tcp_active_timeout = act_timeout; + else if (unformat(input, "init %u", &init_timeout)) + tcp_initial_setup_timeout = init_timeout; + else break; + } + return 0; +} + +static clib_error_t * +set_vcgn_udp_timeout_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + /* + vnet_main_t * vnm = vnet_get_main(); + vcgn_classify_main_t * vcm = &vcgn_classify_main; + */ + u32 act_timeout = 0; + u32 init_timeout = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat(input, "active %u", &act_timeout)) + udp_act_session_timeout = act_timeout; + else if (unformat(input, "init %u", &init_timeout)) + udp_init_session_timeout = init_timeout; + else break; + } + return 0; +} + + +static clib_error_t * +set_vcgn_icmp_timeout_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + /* + * vnet_main_t * vnm = vnet_get_main(); + * vcgn_classify_main_t * vcm = &vcgn_classify_main; + */ + u32 timeout = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat(input, "%u", &timeout)) + ; + else break; + } + icmp_session_timeout = timeout; + return 0; +} + + +static clib_error_t * +set_vcgn_protocol_default_timeout_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + /* + vnet_main_t * vnm = vnet_get_main(); + vcgn_classify_main_t * vcm = &vcgn_classify_main; + */ + u8 *protocol; + u8 reset = 1; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat(input, "%s", &protocol)) + ; + else break; + } + cnat_nat44_set_protocol_timeout_value(0, 0, protocol, reset, vm); + return 0; +} + +static clib_error_t * +set_vcgn_dynamic_port_start_range_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + /* + vnet_main_t * vnm = vnet_get_main(); + vcgn_classify_main_t * vcm = &vcgn_classify_main; + */ + u32 port = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat(input, "%u", &port)) + ; + else break; + } + if (port != 0 && port > 65535) { + vlib_cli_output(vm, "Error !! Invalid port\n"); + } else { + cnat_static_port_range = port; + vlib_cli_output(vm, "Dynamic Port Range Config Successful !!\n"); + } + return 0; +} + +static clib_error_t * +set_vcgn_port_limit_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + /* + vnet_main_t * vnm = vnet_get_main(); + vcgn_classify_main_t * vcm = &vcgn_classify_main; + */ + u32 port = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat(input, "%u", &port)) + ; + else break; + } + if (port != 0 && port > 65535) { + vlib_cli_output(vm, "Error !! Invalid port\n"); + } else { + cnat_main_db_max_ports_per_user = port; + vlib_cli_output(vm, "Port Limit Config Successful !!\n"); + } + return 0; +} + +static inline void nfv9_init_pkt_sent_data(cnat_nfv9_logging_info_t *nfv9_info) +{ + nfv9_server_info_t *server = nfv9_server_info_pool + + nfv9_info->server_index; + + /* + * Reset the pkts_since_last_template and sent_time + * so that template will be sent next time + */ + server->last_template_sent_time = 0; + server->pkts_since_last_template = 0xffffffff; +} + +static inline u16 nfv9_get_max_length_minus_max_record_size(u16 path_mtu) +{ + u16 max_length_minus_max_record_size; + if(!path_mtu) /* Use default */ + path_mtu = NFV9_DEF_PATH_MTU; + + max_length_minus_max_record_size = path_mtu - + CNAT_NFV9_DATAFLOW_RECORD_HEADER_LENGTH - + NFV9_PAD_VALUE - + CNAT_NFV9_MAX_SINGLE_RECORD_LENGTH; /* Note.. as of now this record + * requires max number of bytes. If you add more records, + * this needs to be re-checked */ + if (max_length_minus_max_record_size < CNAT_NFV9_MIN_RECORD_SIZE) { + printf( + "Resetting max_length_minus_max_record_size from %d to %ld\n", + max_length_minus_max_record_size, + CNAT_NFV9_MIN_RECORD_SIZE); + + max_length_minus_max_record_size = CNAT_NFV9_MIN_RECORD_SIZE; + } + return max_length_minus_max_record_size; +} + +/* This function finds if the netflow server indicated by + * new_server_info is already configured for some other instance + * if yes, it returns the same pointer so that, info sent to the + * server is consistent. If the server is not found, a new instance + * is created and returned. If an existing server is used, its refernce + * count is incrimented (indicating the number of instances using the + * same server + */ + /* #define DEBUG_NF_SERVER_CONFIG 1 */ +static u16 nfv9_get_server_instance( + cnat_nfv9_logging_info_t *nfv9_info, nfv9_server_info_t *new_server_info) +{ + + /* Check if the instance has a server already and if yes, does it match */ + nfv9_server_info_t *server; + if(nfv9_info->server_index != EMPTY) { + server = nfv9_server_info_pool + nfv9_info->server_index; + + if((server->ipv4_address == new_server_info->ipv4_address) && + (server->port == new_server_info->port)) { + /* Same server.. just check if refresh rate/timeouts are reduced */ +#ifdef DEBUG_NF_SERVER_CONFIG + if(my_instance_number == 1) { + printf("\n Server match for %x and port %d\n", + new_server_info->ipv4_address, new_server_info->port); + } +#endif /* #ifdef DEBUG_NF_SERVER_CONFIG */ + goto adjust_refresh_rate; + } else { /* The server is being changed */ + server->ref_count--; +#ifdef DEBUG_NF_SERVER_CONFIG + if(my_instance_number == 1) { + printf("\n Server change from %x, %d to %x, %d" + "Ref count %d\n", + server->ipv4_address, + server->port, + new_server_info->ipv4_address, new_server_info->port, + server->ref_count); + } +#endif /* #ifdef DEBUG_NF_SERVER_CONFIG */ + if(!server->ref_count) { + /* Return this server to pool */ +#ifdef DEBUG_NF_SERVER_CONFIG + if(my_instance_number == 1) { + PLATFORM_DEBUG_PRINT("Deleting Server %x, %d at %d\n", + server->ipv4_address, + server->port, + nfv9_info->server_index); + } +#endif /* #ifdef DEBUG_NF_SERVER_CONFIG */ + pool_put(nfv9_server_info_pool, server); + } + } + } + + /* Now check if the server is already present in the pool */ + u8 found = 0; + server = 0; + pool_foreach (server, nfv9_server_info_pool, ({ + if ((server->ipv4_address == new_server_info->ipv4_address) && + (server->port == new_server_info->port)) { + server->ref_count++; + nfv9_info->server_index = server - nfv9_server_info_pool; + found = 1; +#ifdef DEBUG_NF_SERVER_CONFIG + if(my_instance_number == 1) { + printf("Re-using server %x, %d Ref count %d\n", + server->ipv4_address, server->port, server->ref_count); + } +#endif /* #ifdef DEBUG_NF_SERVER_CONFIG */ + break; + } + })); + + if(!found) { + /* Create a new one, initialize and return */ + server = 0; + pool_get(nfv9_server_info_pool, server); + memcpy(server, new_server_info, sizeof(nfv9_server_info_t)); + server->ref_count = 1; + nfv9_info->server_index = server - nfv9_server_info_pool; +#ifdef DEBUG_NF_SERVER_CONFIG + if(my_instance_number == 1) { + printf("Create new server for at %d %x and port %d\n", + nfv9_info->server_index, + new_server_info->ipv4_address, new_server_info->port); + } +#endif /* #ifdef DEBUG_NF_SERVER_CONFIG */ + return CNAT_SUCCESS; + } + +adjust_refresh_rate: + if(server->refresh_rate > + new_server_info->refresh_rate) { + server->refresh_rate = + new_server_info->refresh_rate; +#ifdef DEBUG_NF_SERVER_CONFIG + if(my_instance_number == 1) { + printf("Reset refresh rate to %d\n", + server->refresh_rate); + } +#endif /* #ifdef DEBUG_NF_SERVER_CONFIG */ + } + + if(server->timeout_rate > + new_server_info->timeout_rate) { + server->timeout_rate = + new_server_info->timeout_rate; +#ifdef DEBUG_NF_SERVER_CONFIG + if(my_instance_number == 1) { + printf("Reset timeout rate to %d\n", + server->timeout_rate); + } +#endif /* #ifdef DEBUG_NF_SERVER_CONFIG */ + } + + return CNAT_SUCCESS; +} +static clib_error_t * +set_vcgn_nfv9_logging_cofig_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vcgn_classify_main_t * vcm = &vcgn_classify_main; + spp_api_cnat_v4_config_nfv9_logging_t nfv9_conf; + ip4_address_t server_addr; + u32 ip_addr = 0; + u32 port; + u32 refresh_rate = 0; + u32 timeout = 0; + u32 pmtu = 0; + u8 enable = 1; +/* vcgn changes start*/ + cnat_nfv9_logging_info_t *my_nfv9_logging_info = NULL; + cnat_nfv9_logging_info_t *my_nfv9_logging_info_tmp = NULL; + cnat_vrfmap_t *my_vrfmap = 0, *my_vrfmap_temp; + u16 i_vrf; + u32 i_vrf_id; + u8 found; + u8 found_vrf; + /* + * Init NFv9 logging info as needed, this will be done only once + */ + cnat_nfv9_logging_init(); + + i_vrf = vcm->inside_sw_if_index; + i_vrf_id = vcm->inside_sw_if_index; + + found = 0; + +/* vcgn changes end*/ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "server %U", unformat_ip4_address, &server_addr)) + ip_addr = clib_net_to_host_u32(server_addr.as_u32); + else if (unformat(input, "port %u", &port)) + ; + else if (unformat(input, "refresh-rate %u", &refresh_rate)) + ; + else if (unformat(input, "timeout %u", &timeout)) + ; + else if (unformat(input, "pmtu %u", &pmtu)) + ; + else if (unformat(input, "del")) + enable = 0; + else break; + } + + + #if 0 + vlib_cli_output(vm, "ip 0x%x, port %u, refresh %u, " + "timeout %u, pmtu %u enable %u\n", + ip_addr, port, refresh_rate, + timeout, pmtu, enable); + #endif + nfv9_conf.enable = enable; + nfv9_conf.ipv4_address = ip_addr; + nfv9_conf.i_vrf_id = vcm->inside_sw_if_index; + nfv9_conf.i_vrf = vcm->inside_sw_if_index; + nfv9_conf.port = port; + nfv9_conf.refresh_rate = refresh_rate; + nfv9_conf.timeout_rate = timeout; + nfv9_conf.path_mtu = pmtu; + nfv9_conf.nfv9_global_collector = 0; + nfv9_conf.session_logging = 0; + + /* + * At this point the NFv9 global information should already be + * inited as we have called cnat_nfv9_logging_init() + */ + + if (nfv9_conf.nfv9_global_collector) { + if (cnat_nfv9_global_info.cnat_nfv9_global_collector_index != EMPTY) { + found = 1; + my_nfv9_logging_info = cnat_nfv9_logging_info_pool + + cnat_nfv9_global_info.cnat_nfv9_global_collector_index; + } + } else { + /* Do we already have a map for this VRF? */ + pool_foreach (my_nfv9_logging_info, cnat_nfv9_logging_info_pool, ({ + if (my_nfv9_logging_info->i_vrf_id == i_vrf_id) { + found_vrf = 1; + printf("found_vrf %d\n", found_vrf); + nfv9_server_info_t *server = nfv9_server_info_pool + + my_nfv9_logging_info->server_index; + printf("server ip4 0x%x port %d\n", server->ipv4_address, server->port); + printf("nfv9_conf v4 0x%x port %d\n", nfv9_conf.ipv4_address, nfv9_conf.port); + if((server->ipv4_address == (nfv9_conf.ipv4_address)) && (server->port == (nfv9_conf.port))) { + found = 1; + my_nfv9_logging_info_tmp = my_nfv9_logging_info; + printf("found %d\n", found); + break; + } + } + })); + } + + if ((nfv9_conf.ipv4_address == 0) || + (nfv9_conf.port == 0)) { + vlib_cli_output(vm, + "Add NFv9 ivrf %d Logging Invalid values [IPv4 0x%x, PORT %d]\n", + i_vrf, + (nfv9_conf.ipv4_address), + (nfv9_conf.port)); + goto done; + } + + if (nfv9_conf.enable) { + if ((nfv9_conf.ipv4_address == 0) || + (nfv9_conf.port == 0)) { + nfv9_conf.rc = CNAT_ERR_PARSER; + vlib_cli_output(vm, + "NFV9_logging i_vrf %d, Invalid [v4_addr 0x%x port %d]\n", + i_vrf, + (nfv9_conf.ipv4_address), + (nfv9_conf.port)); + goto done; + } + + nfv9_server_info_t new_server_info; + memset(&new_server_info, 0, sizeof(nfv9_server_info_t)); + new_server_info.ipv4_address = + nfv9_conf.ipv4_address; + new_server_info.port = + (nfv9_conf.port); + new_server_info.refresh_rate = + (nfv9_conf.refresh_rate); + /* + * Store the timeout in seconds. User configures it in minutes + */ + new_server_info.timeout_rate = + 60*(nfv9_conf.timeout_rate); + if (found && my_nfv9_logging_info) { + /* + * Entry already present, change it + */ + my_nfv9_logging_info->max_length_minus_max_record_size = + nfv9_get_max_length_minus_max_record_size( + ((nfv9_conf.path_mtu))); + } else { + pool_get(cnat_nfv9_logging_info_pool, my_nfv9_logging_info); + memset(my_nfv9_logging_info, 0, sizeof(*my_nfv9_logging_info)); + my_nfv9_logging_info->server_index = EMPTY; + my_nfv9_logging_info->nfv9_logging_next_index = EMPTY; + /* + * Make the current and head logging context indeices as EMPTY. + * When first logging happens, these get set correctly + */ + my_nfv9_logging_info->current_logging_context = NULL; + my_nfv9_logging_info->queued_logging_context = NULL; +#if 0 + my_nfv9_logging_info->f = NULL; + my_nfv9_logging_info->to_next = NULL; + output_node = vlib_get_node_by_name (vm, (u8 *) "ip4-input"); + my_nfv9_logging_info->ip4_input_node_index = output_node->index; + printf("ip4_input_node_index %d\n", my_nfv9_logging_info->ip4_input_node_index); +#endif + my_nfv9_logging_info->i_vrf = i_vrf; + my_nfv9_logging_info->i_vrf_id = i_vrf_id; + my_nfv9_logging_info->max_length_minus_max_record_size = + nfv9_get_max_length_minus_max_record_size( + nfv9_conf.path_mtu); + + /* my_nfv9_logging_info will have a copy of logging_policy + * because, it is quite possible that nfv9 config arrives before + * the corresponding vrfmap is initialized. In such cases + * this copy will be used to update the vrfmap entry + */ + my_nfv9_logging_info->logging_policy = nfv9_conf.session_logging; + + if (nfv9_conf.nfv9_global_collector) { + cnat_nfv9_global_info.cnat_nfv9_global_collector_index = + my_nfv9_logging_info - cnat_nfv9_logging_info_pool; + + pool_foreach (my_vrfmap, cnat_map_by_vrf, ({ + if (my_vrfmap->nfv9_logging_index == EMPTY) { + my_vrfmap->nfv9_logging_index = + cnat_nfv9_global_info.cnat_nfv9_global_collector_index; + } + })); + } else { + u32 my_vrfmap_found = 0; + + FIND_MY_VRF_USING_I_VRF_ID + my_vrfmap = my_vrfmap_temp; + if (my_vrfmap_found) { + if(my_vrfmap->nfv9_logging_index == EMPTY) { + my_vrfmap->nfv9_logging_index = + my_nfv9_logging_info - cnat_nfv9_logging_info_pool; + // my_vrfmap->nf_logging_policy = mp->session_logging; + } else { + cnat_nfv9_logging_info_t *my_nfv9_logging_info_temp = cnat_nfv9_logging_info_pool + my_vrfmap->nfv9_logging_index; + while(my_nfv9_logging_info_temp->nfv9_logging_next_index != EMPTY){ + my_nfv9_logging_info_temp = cnat_nfv9_logging_info_pool + my_nfv9_logging_info_temp->nfv9_logging_next_index; + } + my_nfv9_logging_info_temp->nfv9_logging_next_index = my_nfv9_logging_info - cnat_nfv9_logging_info_pool; + } + } + } + } + + /* Update logging policy */ + my_nfv9_logging_info->logging_policy = nfv9_conf.session_logging; + if (nfv9_conf.nfv9_global_collector) { + if(PLATFORM_DBL_SUPPORT) { + pool_foreach (my_vrfmap, cnat_map_by_vrf, ({ + if (my_vrfmap->nfv9_logging_index == + cnat_nfv9_global_info.cnat_nfv9_global_collector_index) { + my_vrfmap->nf_logging_policy = nfv9_conf.session_logging; + } + })); + } else { + nfv9_conf.rc = CNAT_ERR_NO_SESSION_DB; + } + } else { + if(PLATFORM_DBL_SUPPORT) { + u32 my_vrfmap_found = 0; + my_vrfmap_temp = NULL; + FIND_MY_VRF_USING_I_VRF_ID + my_vrfmap = my_vrfmap_temp; + if (my_vrfmap_found) { + // my_vrfmap->nf_logging_policy = mp->session_logging; + } + } else { + nfv9_conf.rc = CNAT_ERR_NO_SESSION_DB; + } + } + u8 nfv9_logging_policy = 0; + u32 my_vrfmap_found = 0; + my_vrfmap_temp = NULL; + FIND_MY_VRF_USING_I_VRF_ID + my_vrfmap = my_vrfmap_temp; + if (my_vrfmap_found) { + u32 index_curr = my_vrfmap->nfv9_logging_index; + cnat_nfv9_logging_info_t *my_nfv9_logging_info_temp; + while(index_curr != EMPTY) { + my_nfv9_logging_info_temp = cnat_nfv9_logging_info_pool + index_curr; + nfv9_logging_policy = nfv9_logging_policy || my_nfv9_logging_info_temp->logging_policy; + index_curr = (cnat_nfv9_logging_info_pool + index_curr)->nfv9_logging_next_index; + } + my_vrfmap->nf_logging_policy = nfv9_logging_policy; + } + //vlib_cli_output(vm,"Netflow logging policy = %d\n", my_vrfmap->nf_logging_policy); + if(nfv9_get_server_instance(my_nfv9_logging_info, &new_server_info) + != CNAT_SUCCESS) { + vlib_cli_output(vm, "Error to get server instance"); + nfv9_conf.rc = CNAT_ERR_PARSER; + goto done; + } + nfv9_init_pkt_sent_data(my_nfv9_logging_info); + + vlib_cli_output(vm,"Adding NFv9 Logging Succeeded\n"); + nfv9_configured = 1; + + } else { + /*Delete path*/ + if (found) { + /* if found entry then we need to overwrite the my_nfv9_logging_info_tmp + * to my_nfv9_logging_info + */ + my_nfv9_logging_info = my_nfv9_logging_info_tmp; + if (i_vrf == INVALID_UIDX) { + /* + * We are deleting a global collector. Mark the collectors + * in those VRFs using the global collector + */ + pool_foreach (my_vrfmap, cnat_map_by_vrf, ({ + if (my_vrfmap->nfv9_logging_index == + cnat_nfv9_global_info.cnat_nfv9_global_collector_index) { + my_vrfmap->nfv9_logging_index = EMPTY; + } + })); + + cnat_nfv9_global_info.cnat_nfv9_global_collector_index = EMPTY; + } else { + u32 my_vrfmap_found = 0; + my_vrfmap_temp = NULL; + FIND_MY_VRF_USING_I_VRF_ID + my_vrfmap = my_vrfmap_temp; + if (my_vrfmap_found) { + // my_vrfmap->nfv9_logging_index = cnat_nfv9_global_info.cnat_nfv9_global_collector_index; + } + } + if (my_nfv9_logging_info->queued_logging_context || + my_nfv9_logging_info->current_logging_context) { + /* + * If there is a pending context: + * Set the deleted flag to 1. This will ensure + * that the logging info structure gets freed after any + * pending packet get sent + */ + my_nfv9_logging_info->deleted = 1; + } else { + /* + * No pending context, just free the logging info structure + */ + u32 index = my_nfv9_logging_info - cnat_nfv9_logging_info_pool; + if(index == my_vrfmap->nfv9_logging_index) { + /* Deleting the first sever */ + my_vrfmap->nfv9_logging_index = my_nfv9_logging_info->nfv9_logging_next_index; + /* if(my_nfv9_logging_info->nfv9_logging_next_index != EMPTY){ + my_vrfmap->nf_logging_policy = (cnat_nfv9_logging_info_pool + my_nfv9_logging_info->nfv9_logging_next_index)->logging_policy; + } else { + my_vrfmap->nf_logging_policy = EMPTY; + }*/ + } else { + u32 index_curr = my_vrfmap->nfv9_logging_index; + u32 index_prev = EMPTY; + while(index_curr != EMPTY) { + index_prev = index_curr; + index_curr = (cnat_nfv9_logging_info_pool + index_curr)->nfv9_logging_next_index; + if(index == index_curr) + { + (cnat_nfv9_logging_info_pool + index_prev)->nfv9_logging_next_index = (cnat_nfv9_logging_info_pool + index_curr)->nfv9_logging_next_index; + break; + } + } + } + nfv9_delete_server_info(my_nfv9_logging_info); + pool_put(cnat_nfv9_logging_info_pool, my_nfv9_logging_info); + } + + vlib_cli_output(vm, "Deleting NFv9 Logging Succeeded\n"); + /* + * Search across all vrf and check if nfv9 logging is configured. + */ + nfv9_configured = 0; + pool_foreach (my_nfv9_logging_info, cnat_nfv9_logging_info_pool, ({ + nfv9_configured = 1; + break; + })); + } else { + nfv9_conf.rc = CNAT_NO_CONFIG; + vlib_cli_output(vm, "Add NFv9 Logging Failed (2) Non Existent vrf %d\n", + i_vrf); + + } + u8 nfv9_logging_policy = 0; + u32 my_vrfmap_found = 0; + my_vrfmap_temp = NULL; + FIND_MY_VRF_USING_I_VRF_ID + my_vrfmap = my_vrfmap_temp; + if (my_vrfmap_found) { + u32 index_curr = my_vrfmap->nfv9_logging_index; + cnat_nfv9_logging_info_t *my_nfv9_logging_info_temp; + while(index_curr != EMPTY) { + my_nfv9_logging_info_temp = cnat_nfv9_logging_info_pool + index_curr; + nfv9_logging_policy = nfv9_logging_policy || my_nfv9_logging_info_temp->logging_policy; + index_curr = (cnat_nfv9_logging_info_pool + index_curr)->nfv9_logging_next_index; + } + my_vrfmap->nf_logging_policy = nfv9_logging_policy; + } + printf("After deleting the netflow server,Netflow logging policy = %d\n", my_vrfmap->nf_logging_policy); + } + +done: + return 0; +} + +/* config CLIs */ +VLIB_CLI_COMMAND (set_vcgn_map_command) = { + .path = "set vcgn map", + .short_help = "set vcgn map <lo-address> [- <hi-address>]", + .function = set_vcgn_map_command_fn, +}; + +VLIB_CLI_COMMAND (set_vcgn_inside_command) = { + .path = "set vcgn inside", + .short_help = "set vcgn inside <inside intfc> outside <outside intfc>", + .function = set_vcgn_inside_command_fn, +}; + +VLIB_CLI_COMMAND (set_vcgn_tcp_timeout_command) = { + .path = "set vcgn tcp timeout", + .short_help = "set vcgn tcp timeout active <1-65535> init <1-65535>", + .function = set_vcgn_tcp_timeout_command_fn, +}; + +VLIB_CLI_COMMAND (set_vcgn_udp_timeout_command) = { + .path = "set vcgn udp timeout", + .short_help = "set vcgn udp timeout active <1-65535> init <1-65535>", + .function = set_vcgn_udp_timeout_command_fn, +}; + +VLIB_CLI_COMMAND (set_vcgn_icmp_timeout_command) = { + .path = "set vcgn icmp timeout", + .short_help = "set vcgn icmp timeout <1-65535>", + .function = set_vcgn_icmp_timeout_command_fn, +}; + +VLIB_CLI_COMMAND (set_vcgn_protocol_default_timeout_command) = { + .path = "set vcgn default timeout", + .short_help = "set vcgn default timeout protocol <tcp/udp/icmp>", + .function = set_vcgn_protocol_default_timeout_command_fn, +}; + +VLIB_CLI_COMMAND (set_vcgn_dynamic_port_start_range_command) = { + .path = "set vcgn dynamic port start", + .short_help = "set vcgn dynamic port start <1-65535>", + .function = set_vcgn_dynamic_port_start_range_command_fn, +}; + +VLIB_CLI_COMMAND (set_vcgn_port_limit_command) = { + .path = "set vcgn port limit", + .short_help = "set vcgn port limit <1-65535>", + .function = set_vcgn_port_limit_command_fn, +}; + +VLIB_CLI_COMMAND (set_vcgn_nfv9_logging_cofig_command) = { + .path = "set vcgn nfv9", + .short_help = "set vcgn nfv9 [del] server <ip-addr> port <port> [refresh-rate <n>] [timeout <n>] [pmtu <n>]", + .function = set_vcgn_nfv9_logging_cofig_command_fn, +}; + + +/* show CLIs */ +VLIB_CLI_COMMAND (show_vcgn_config_command) = { + .path = "show vcgn config", + .short_help = "show vcgn config", + .function = show_vcgn_config_command_fn, +}; + +VLIB_CLI_COMMAND (show_vcgn_stat_command) = { + .path = "show vcgn statistics", + .short_help = "show vcgn statistics", + .function = show_vcgn_stats_command_fn, +}; + +VLIB_CLI_COMMAND (show_vcgn_inside_translation_command) = { + .path = "show vcgn inside-translation", + .short_help = "show vcgn inside-translation protocol <tcp/udp/icmp> " + "inside-addr <ip-addr> [start-port <n>] [end-port <n>]", + .function = show_vcgn_inside_translation_command_fn, +}; + +VLIB_CLI_COMMAND (show_vcgn_outside_translation_command) = { + .path = "show vcgn outside-translation", + .short_help = "show vcgn outside-translation protocol <tcp/udp/icmp> " + "outside-addr <ip-addr> [start-port <n>] [end-port <n>]", + .function = show_vcgn_outside_translation_command_fn, +}; + +static clib_error_t * +vcgn_init (vlib_main_t * vm) +{ + clib_error_t * error = 0; + + if ((error = vlib_call_init_function + (vm, vcgn_classify_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_ipv4_udp_inside_input_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_ipv4_udp_outside_input_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_ipv4_udp_inside_input_exc_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_db_scanner_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_ipv4_tcp_inside_input_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_ipv4_tcp_inside_input_exc_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_ipv4_tcp_outside_input_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_ipv4_icmp_q_inside_input_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_ipv4_icmp_q_inside_input_exc_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_ipv4_icmp_q_outside_input_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_ipv4_icmp_e_inside_input_init))) + return error; + if ((error = vlib_call_init_function + (vm, cnat_ipv4_icmp_e_outside_input_init))) + return error; + + return error; +} + +VLIB_INIT_FUNCTION (vcgn_init); diff --git a/vnet/vnet/vcgn/vcgn_db.h b/vnet/vnet/vcgn/vcgn_db.h new file mode 100644 index 00000000000..cd7d835cba1 --- /dev/null +++ b/vnet/vnet/vcgn/vcgn_db.h @@ -0,0 +1,117 @@ +/* + *------------------------------------------------------------------ + * vcgn_db.h - translation database definitions + * + * Copyright (c) 2007-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __VCGN_DB_H__ +#define __VCGN_DB_H__ + +#include "index_list.h" + +/* + * The key structure. All fields are in NETWORK byte order! + */ +typedef struct { + u32 ipv4; + u16 port; + u16 vrf; //bit0-12:vrf, bit13:unused, bit14-15:protocol +} cnat_db_key_t; + +/* bit14-15:protocol in cnat_db_key_t */ +#define CNAT_INVALID_PROTO 0x0000 +#define CNAT_UDP 0x4000 +#define CNAT_TCP 0x8000 +#define CNAT_ICMP 0xc000 +#define CNAT_VRF_MASK 0x3fff +#define CNAT_PRO_MASK 0xc000 +#define CNAT_PRO_SHIFT 14 + +/* + * Maximum number of VRF entries supported + */ +#define CNAT_MAX_VRFMAP_ENTRIES (CNAT_VRF_MASK + 1) +/* + * for hashing purposes, fetch the key in one instr. + */ +typedef union { + cnat_db_key_t k; + u64 key64; +} cnat_key_t; + +/* + * Main translation database entries. Currently 0x50 = 80 bytes in length. + * Given 20,000,000 entries, it saves nearly 1gb of SDRAM to pack the entries + * and pay the extra prefetch. So, that's what we do. + */ + +typedef struct { + /* 0x00 */ + index_slist_t out2in_hash; /* hash-and-chain, x2 */ + index_slist_t in2out_hash; + + /* 0x08 */ + cnat_key_t out2in_key; /* network-to-user, outside-to-inside key */ + + /* 0x10 */ + cnat_key_t in2out_key; /* user-to-network, inside-to-outside key */ + + /* 0x18 */ + index_dlist_t user_ports; /* per-user translation list */ + + /* 0x20 */ + u32 user_index; /* index of user that owns this entry */ + + /* 0x24 */ + u16 vrfmap_index; /* index of vrfmap */ + + /* 0x26 */ + u16 flags; /* Always need flags... */ +#define CNAT_DB_FLAG_PORT_PAIR (1<<0) +#define CNAT_DB_FLAG_TCP_ACTIVE (1<<1) +#define CNAT_DB_FLAG_ENTRY_FREE (1<<2) +#define CNAT_DB_FLAG_UDP_ACTIVE (1<<3) +#define CNAT_DB_FLAG_STATIC_PORT (1<<4) +#define CNAT_DB_FLAG_ALG_ENTRY (1<<5) + + /* 0x28 */ + u32 dst_ipv4; /* pointer to ipv4 dst list, used in evil mode */ + + /* 0x2C */ + u32 out2in_pkts; /* pkt counters */ + + /* 0x30 */ + u32 in2out_pkts; + + /* 0x34 */ + u32 entry_expires; /* timestamp used to expire translations */ + + /* 0x38 */ + union { /* used by FTP ALG, pkt len delta due to FTP PORT cmd */ + u16 delta; + i8 alg_dlt[2]; /* two delta values, 0 for previous, 1 for current */ + u16 il; /* Used to indicate if interleaved mode is used + in case of RTSP ALG */ + } alg; + + /* 0x 48 */ + u32 tcp_seq_num; /* last tcp (FTP) seq # that has pkt len change due to PORT */ + + cnat_timeout_t destn_key; + + /* 0x4C... last byte -- 72 total */ +} cnat_main_db_entry_t; +#endif diff --git a/vnet/vnet/vnet.h b/vnet/vnet/vnet.h new file mode 100644 index 00000000000..2378c2420b8 --- /dev/null +++ b/vnet/vnet/vnet.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * vnet.h: general networking definitions + * + * Copyright (c) 2011 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_vnet_h +#define included_vnet_vnet_h + +#include <vppinfra/types.h> + +typedef enum { + VNET_UNICAST, + VNET_MULTICAST, + VNET_N_CAST, +} vnet_cast_t; + +#include <vnet/unix/pcap.h> +#include <vnet/buffer.h> +#include <vnet/config.h> +#include <vnet/interface.h> +#include <vnet/rewrite.h> +#include <vnet/api_errno.h> + +typedef struct vnet_main_t { + u32 local_interface_hw_if_index; + u32 local_interface_sw_if_index; + + vnet_interface_main_t interface_main; + + /* set up by constructors */ + vnet_device_class_t * device_class_registrations; + vnet_hw_interface_class_t * hw_interface_class_registrations; + _vnet_interface_function_list_elt_t * hw_interface_add_del_functions; + _vnet_interface_function_list_elt_t * hw_interface_link_up_down_functions; + _vnet_interface_function_list_elt_t * sw_interface_add_del_functions; + _vnet_interface_function_list_elt_t * sw_interface_admin_up_down_functions; + + /* + * Last "api" error, preserved so we can issue reasonable diagnostics + * at or near the top of the food chain + */ + vnet_api_error_t api_errno; + + vlib_main_t * vlib_main; +} vnet_main_t; + +vnet_main_t vnet_main; +vnet_main_t **vnet_mains; + +#include <vnet/interface_funcs.h> +#include <vnet/global_funcs.h> + +#if DPDK > 0 +#include <vnet/devices/dpdk/threads.h> +#include <vnet/dpdk_replication.h> +#endif + +#endif /* included_vnet_vnet_h */ diff --git a/vnet/vnet/vxlan/decap.c b/vnet/vnet/vxlan/decap.c new file mode 100644 index 00000000000..7789bed9310 --- /dev/null +++ b/vnet/vnet/vxlan/decap.c @@ -0,0 +1,429 @@ +/* + * decap.c: vxlan tunnel decap packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/vxlan/vxlan.h> + +vlib_node_registration_t vxlan_input_node; + +typedef struct { + u32 next_index; + u32 tunnel_index; + u32 error; + u32 vni; +} vxlan_rx_trace_t; + +static u8 * format_vxlan_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + vxlan_rx_trace_t * t = va_arg (*args, vxlan_rx_trace_t *); + + if (t->tunnel_index != ~0) + { + s = format (s, "VXLAN: tunnel %d vni %d next %d error %d", + t->tunnel_index, t->vni, t->next_index, t->error); + } + else + { + s = format (s, "VXLAN: no tunnel for vni %d next %d error %d", + t->vni, t->next_index, t->error); + } + return s; +} + +static uword +vxlan_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + vxlan_main_t * vxm = &vxlan_main; + vnet_main_t * vnm = vxm->vnet_main; + vnet_interface_main_t * im = &vnm->interface_main; + u32 last_tunnel_index = ~0; + vxlan_tunnel_key_t last_key; + u32 pkts_decapsulated = 0; + u32 cpu_index = os_get_cpu_number(); + u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; + + last_key.as_u64 = ~0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + stats_sw_if_index = node->runtime_data[0]; + stats_n_packets = stats_n_bytes = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + ip4_header_t * ip0, * ip1; + vxlan_header_t * vxlan0, * vxlan1; + uword * p0, * p1; + u32 tunnel_index0, tunnel_index1; + vxlan_tunnel_t * t0, * t1; + vxlan_tunnel_key_t key0, key1; + u32 error0, error1; + u32 sw_if_index0, sw_if_index1, len0, len1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* udp leaves current_data pointing at the vxlan header */ + vxlan0 = vlib_buffer_get_current (b0); + vxlan1 = vlib_buffer_get_current (b1); + + vlib_buffer_advance + (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); + vlib_buffer_advance + (b1, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* pop (ip, udp, vxlan) */ + vlib_buffer_advance + (b0, sizeof(*ip0)+sizeof(udp_header_t)+sizeof(*vxlan0)); + vlib_buffer_advance + (b1, sizeof(*ip1)+sizeof(udp_header_t)+sizeof(*vxlan1)); + + tunnel_index0 = ~0; + error0 = 0; + + tunnel_index1 = ~0; + error1 = 0; + + key0.src = ip0->src_address.as_u32; + key0.vni = vxlan0->vni_reserved; + + if (PREDICT_FALSE (key0.as_u64 != last_key.as_u64)) + { + p0 = hash_get (vxm->vxlan_tunnel_by_key, key0.as_u64); + + if (p0 == 0) + { + error0 = VXLAN_ERROR_NO_SUCH_TUNNEL; + next0 = VXLAN_INPUT_NEXT_DROP; + goto trace0; + } + + last_key.as_u64 = key0.as_u64; + tunnel_index0 = last_tunnel_index = p0[0]; + } + else + tunnel_index0 = last_tunnel_index; + + t0 = pool_elt_at_index (vxm->tunnels, tunnel_index0); + + next0 = t0->decap_next_index; + sw_if_index0 = t0->sw_if_index; + len0 = vlib_buffer_length_in_chain (vm, b0); + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b0); + + /* Set input sw_if_index to VXLAN tunnel for learning */ + vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0; + + pkts_decapsulated ++; + stats_n_packets += 1; + stats_n_bytes += len0; + + /* Batch stats increment on the same vxlan tunnel so counter + is not incremented per packet */ + if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index)) + { + stats_n_packets -= 1; + stats_n_bytes -= len0; + if (stats_n_packets) + vlib_increment_combined_counter + (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + cpu_index, stats_sw_if_index, + stats_n_packets, stats_n_bytes); + stats_n_packets = 1; + stats_n_bytes = len0; + stats_sw_if_index = sw_if_index0; + } + + trace0: + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_rx_trace_t *tr + = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->error = error0; + tr->tunnel_index = tunnel_index0; + tr->vni = vnet_get_vni (vxlan0); + } + + key1.src = ip1->src_address.as_u32; + key1.vni = vxlan1->vni_reserved; + + if (PREDICT_FALSE (key1.as_u64 != last_key.as_u64)) + { + p1 = hash_get (vxm->vxlan_tunnel_by_key, key1.as_u64); + + if (p1 == 0) + { + error1 = VXLAN_ERROR_NO_SUCH_TUNNEL; + next1 = VXLAN_INPUT_NEXT_DROP; + goto trace1; + } + + last_key.as_u64 = key1.as_u64; + tunnel_index1 = last_tunnel_index = p1[0]; + } + else + tunnel_index1 = last_tunnel_index; + + t1 = pool_elt_at_index (vxm->tunnels, tunnel_index1); + + next1 = t1->decap_next_index; + sw_if_index1 = t1->sw_if_index; + len1 = vlib_buffer_length_in_chain (vm, b1); + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b1); + + /* Set input sw_if_index to VXLAN tunnel for learning */ + vnet_buffer(b1)->sw_if_index[VLIB_RX] = sw_if_index1; + + pkts_decapsulated ++; + stats_n_packets += 1; + stats_n_bytes += len1; + + /* Batch stats increment on the same vxlan tunnel so counter + is not incremented per packet */ + if (PREDICT_FALSE (sw_if_index1 != stats_sw_if_index)) + { + stats_n_packets -= 1; + stats_n_bytes -= len1; + if (stats_n_packets) + vlib_increment_combined_counter + (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + cpu_index, stats_sw_if_index, + stats_n_packets, stats_n_bytes); + stats_n_packets = 1; + stats_n_bytes = len1; + stats_sw_if_index = sw_if_index1; + } + + trace1: + b1->error = error1 ? node->errors[error1] : 0; + + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_rx_trace_t *tr + = vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->next_index = next1; + tr->error = error1; + tr->tunnel_index = tunnel_index1; + tr->vni = vnet_get_vni (vxlan1); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + ip4_header_t * ip0; + vxlan_header_t * vxlan0; + uword * p0; + u32 tunnel_index0; + vxlan_tunnel_t * t0; + vxlan_tunnel_key_t key0; + u32 error0; + u32 sw_if_index0, len0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* udp leaves current_data pointing at the vxlan header */ + vxlan0 = vlib_buffer_get_current (b0); + + vlib_buffer_advance + (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); + ip0 = vlib_buffer_get_current (b0); + + /* pop (ip, udp, vxlan) */ + vlib_buffer_advance + (b0, sizeof(*ip0)+sizeof(udp_header_t)+sizeof(*vxlan0)); + + tunnel_index0 = ~0; + error0 = 0; + + key0.src = ip0->src_address.as_u32; + key0.vni = vxlan0->vni_reserved; + + if (PREDICT_FALSE (key0.as_u64 != last_key.as_u64)) + { + p0 = hash_get (vxm->vxlan_tunnel_by_key, key0.as_u64); + + if (p0 == 0) + { + error0 = VXLAN_ERROR_NO_SUCH_TUNNEL; + next0 = VXLAN_INPUT_NEXT_DROP; + goto trace00; + } + + last_key.as_u64 = key0.as_u64; + tunnel_index0 = last_tunnel_index = p0[0]; + } + else + tunnel_index0 = last_tunnel_index; + + t0 = pool_elt_at_index (vxm->tunnels, tunnel_index0); + + next0 = t0->decap_next_index; + sw_if_index0 = t0->sw_if_index; + len0 = vlib_buffer_length_in_chain (vm, b0); + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b0); + + /* Set input sw_if_index to VXLAN tunnel for learning */ + vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0; + + pkts_decapsulated ++; + stats_n_packets += 1; + stats_n_bytes += len0; + + /* Batch stats increment on the same vxlan tunnel so counter + is not incremented per packet */ + if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index)) + { + stats_n_packets -= 1; + stats_n_bytes -= len0; + if (stats_n_packets) + vlib_increment_combined_counter + (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + cpu_index, stats_sw_if_index, + stats_n_packets, stats_n_bytes); + stats_n_packets = 1; + stats_n_bytes = len0; + stats_sw_if_index = sw_if_index0; + } + + trace00: + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_rx_trace_t *tr + = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->error = error0; + tr->tunnel_index = tunnel_index0; + tr->vni = vnet_get_vni (vxlan0); + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + /* Do we still need this now that tunnel tx stats is kept? */ + vlib_node_increment_counter (vm, vxlan_input_node.index, + VXLAN_ERROR_DECAPSULATED, + pkts_decapsulated); + + /* Increment any remaining batch stats */ + if (stats_n_packets) + { + vlib_increment_combined_counter + (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); + node->runtime_data[0] = stats_sw_if_index; + } + + return from_frame->n_vectors; +} + +static char * vxlan_error_strings[] = { +#define vxlan_error(n,s) s, +#include <vnet/vxlan/vxlan_error.def> +#undef vxlan_error +#undef _ +}; + +VLIB_REGISTER_NODE (vxlan_input_node) = { + .function = vxlan_input, + .name = "vxlan-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = VXLAN_N_ERROR, + .error_strings = vxlan_error_strings, + + .n_next_nodes = VXLAN_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n, + foreach_vxlan_input_next +#undef _ + }, + +//temp .format_buffer = format_vxlan_header, + .format_trace = format_vxlan_rx_trace, + // $$$$ .unformat_buffer = unformat_vxlan_header, +}; diff --git a/vnet/vnet/vxlan/encap.c b/vnet/vnet/vxlan/encap.c new file mode 100644 index 00000000000..4b475f83886 --- /dev/null +++ b/vnet/vnet/vxlan/encap.c @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/vxlan/vxlan.h> + +/* Statistics (not all errors) */ +#define foreach_vxlan_encap_error \ +_(ENCAPSULATED, "good packets encapsulated") \ +_(DEL_TUNNEL, "deleted tunnel packets") + +static char * vxlan_encap_error_strings[] = { +#define _(sym,string) string, + foreach_vxlan_encap_error +#undef _ +}; + +typedef enum { +#define _(sym,str) VXLAN_ENCAP_ERROR_##sym, + foreach_vxlan_encap_error +#undef _ + VXLAN_ENCAP_N_ERROR, +} vxlan_encap_error_t; + +typedef enum { + VXLAN_ENCAP_NEXT_IP4_LOOKUP, + VXLAN_ENCAP_NEXT_DROP, + VXLAN_ENCAP_N_NEXT, +} vxlan_encap_next_t; + +typedef struct { + u32 tunnel_index; + u32 vni; +} vxlan_encap_trace_t; + +u8 * format_vxlan_encap_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + vxlan_encap_trace_t * t + = va_arg (*args, vxlan_encap_trace_t *); + + s = format (s, "VXLAN-ENCAP: tunnel %d vni %d", t->tunnel_index, t->vni); + return s; +} + +#define foreach_fixed_header_offset \ + _(0) _(1) _(2) _(3) + +static uword +vxlan_encap (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + vxlan_main_t * vxm = &vxlan_main; + vnet_main_t * vnm = vxm->vnet_main; + vnet_interface_main_t * im = &vnm->interface_main; + u32 pkts_encapsulated = 0; + u16 old_l0 = 0, old_l1 = 0; + u32 cpu_index = os_get_cpu_number(); + u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + stats_sw_if_index = node->runtime_data[0]; + stats_n_packets = stats_n_bytes = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 flow_hash0, flow_hash1; + u32 next0 = VXLAN_ENCAP_NEXT_IP4_LOOKUP; + u32 next1 = VXLAN_ENCAP_NEXT_IP4_LOOKUP; + u32 sw_if_index0, sw_if_index1, len0, len1; + vnet_hw_interface_t * hi0, * hi1; + ip4_header_t * ip0, * ip1; + udp_header_t * udp0, * udp1; + u64 * copy_src0, * copy_dst0; + u64 * copy_src1, * copy_dst1; + u32 * copy_src_last0, * copy_dst_last0; + u32 * copy_src_last1, * copy_dst_last1; + vxlan_tunnel_t * t0, * t1; + u16 new_l0, new_l1; + ip_csum_t sum0, sum1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + flow_hash0 = vnet_l2_compute_flow_hash (b0); + flow_hash1 = vnet_l2_compute_flow_hash (b1); + + /* 1-wide cache? */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX]; + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); + + t0 = &vxm->tunnels[hi0->dev_instance]; + t1 = &vxm->tunnels[hi1->dev_instance]; + + /* Check rewrite string and drop packet if tunnel is deleted */ + if (PREDICT_FALSE(t0->rewrite == vxlan_dummy_rewrite)) + { + next0 = VXLAN_ENCAP_NEXT_DROP; + b0->error = node->errors[VXLAN_ENCAP_ERROR_DEL_TUNNEL]; + pkts_encapsulated --; + } /* Still go through normal encap with dummy rewrite */ + if (PREDICT_FALSE(t1->rewrite == vxlan_dummy_rewrite)) + { + next1 = VXLAN_ENCAP_NEXT_DROP; + b1->error = node->errors[VXLAN_ENCAP_ERROR_DEL_TUNNEL]; + pkts_encapsulated --; + } /* Still go through normal encap with dummy rewrite */ + + /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octects */ + ASSERT(vec_len(t0->rewrite) == 36); + ASSERT(vec_len(t1->rewrite) == 36); + + /* Apply the rewrite string. $$$$ vnet_rewrite? */ + vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite)); + vlib_buffer_advance (b1, -(word)_vec_len(t1->rewrite)); + + ip0 = vlib_buffer_get_current(b0); + ip1 = vlib_buffer_get_current(b1); + /* Copy the fixed header */ + copy_dst0 = (u64 *) ip0; + copy_src0 = (u64 *) t0->rewrite; + copy_dst1 = (u64 *) ip1; + copy_src1 = (u64 *) t1->rewrite; + + /* Copy first 32 octets 8-bytes at a time */ +#define _(offs) copy_dst0[offs] = copy_src0[offs]; + foreach_fixed_header_offset; +#undef _ +#define _(offs) copy_dst1[offs] = copy_src1[offs]; + foreach_fixed_header_offset; +#undef _ + + /* Last 4 octets. Hopefully gcc will be our friend */ + copy_dst_last0 = (u32 *)(©_dst0[4]); + copy_src_last0 = (u32 *)(©_src0[4]); + copy_dst_last1 = (u32 *)(©_dst1[4]); + copy_src_last1 = (u32 *)(©_src1[4]); + + copy_dst_last0[0] = copy_src_last0[0]; + copy_dst_last1[0] = copy_src_last1[0]; + + /* fix the <bleep>ing outer-IP checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + sum1 = ip1->checksum; + /* old_l1 always 0, see the rewrite setup */ + new_l1 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); + + sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, + length /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + ip1->length = new_l1; + + /* Fix UDP length */ + udp0 = (udp_header_t *)(ip0+1); + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + udp1 = (udp_header_t *)(ip1+1); + new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) + - sizeof (*ip1)); + + udp0->length = new_l0; + udp0->src_port = flow_hash0; + + udp1->length = new_l1; + udp1->src_port = flow_hash1; + + /* Reset to look up tunnel partner in the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index; + pkts_encapsulated += 2; + + len0 = vlib_buffer_length_in_chain (vm, b0); + len1 = vlib_buffer_length_in_chain (vm, b0); + stats_n_packets += 2; + stats_n_bytes += len0 + len1; + + /* Batch stats increment on the same vxlan tunnel so counter is not + incremented per packet. Note stats are still incremented for deleted + and admin-down tunnel where packets are dropped. It is not worthwhile + to check for this rare case and affect normal path performance. */ + if (PREDICT_FALSE ((sw_if_index0 != stats_sw_if_index) || + (sw_if_index0 != stats_sw_if_index))) + { + stats_n_packets -= 2; + stats_n_bytes -= len0 + len1; + if (sw_if_index0 == sw_if_index1) + { + if (stats_n_packets) + vlib_increment_combined_counter + (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, + cpu_index, stats_sw_if_index, + stats_n_packets, stats_n_bytes); + stats_sw_if_index = sw_if_index0; + stats_n_packets = 2; + stats_n_bytes = len0 + len1; + } + else + { + vlib_increment_combined_counter + (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, + cpu_index, sw_if_index0, 1, len0); + vlib_increment_combined_counter + (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, + cpu_index, sw_if_index1, 1, len1); + } + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->tunnel_index = t0 - vxm->tunnels; + tr->vni = t0->vni; + } + + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_encap_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->tunnel_index = t1 - vxm->tunnels; + tr->vni = t1->vni; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 flow_hash0; + u32 next0 = VXLAN_ENCAP_NEXT_IP4_LOOKUP; + u32 sw_if_index0, len0; + vnet_hw_interface_t * hi0; + ip4_header_t * ip0; + udp_header_t * udp0; + u64 * copy_src0, * copy_dst0; + u32 * copy_src_last0, * copy_dst_last0; + vxlan_tunnel_t * t0; + u16 new_l0; + ip_csum_t sum0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + flow_hash0 = vnet_l2_compute_flow_hash(b0); + + /* 1-wide cache? */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + t0 = &vxm->tunnels[hi0->dev_instance]; + + /* Check rewrite string and drop packet if tunnel is deleted */ + if (PREDICT_FALSE(t0->rewrite == vxlan_dummy_rewrite)) + { + next0 = VXLAN_ENCAP_NEXT_DROP; + b0->error = node->errors[VXLAN_ENCAP_ERROR_DEL_TUNNEL]; + pkts_encapsulated --; + } /* Still go through normal encap with dummy rewrite */ + + /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octects */ + ASSERT(vec_len(t0->rewrite) == 36); + + /* Apply the rewrite string. $$$$ vnet_rewrite? */ + vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite)); + + ip0 = vlib_buffer_get_current(b0); + /* Copy the fixed header */ + copy_dst0 = (u64 *) ip0; + copy_src0 = (u64 *) t0->rewrite; + + /* Copy first 32 octets 8-bytes at a time */ +#define _(offs) copy_dst0[offs] = copy_src0[offs]; + foreach_fixed_header_offset; +#undef _ + /* Last 4 octets. Hopefully gcc will be our friend */ + copy_dst_last0 = (u32 *)(©_dst0[4]); + copy_src_last0 = (u32 *)(©_src0[4]); + + copy_dst_last0[0] = copy_src_last0[0]; + + /* fix the <bleep>ing outer-IP checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + /* Fix UDP length */ + udp0 = (udp_header_t *)(ip0+1); + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + + udp0->length = new_l0; + udp0->src_port = flow_hash0; + + /* Reset to look up tunnel partner in the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; + pkts_encapsulated ++; + + len0 = vlib_buffer_length_in_chain (vm, b0); + stats_n_packets += 1; + stats_n_bytes += len0; + + /* Batch stats increment on the same vxlan tunnel so counter is not + incremented per packet. Note stats are still incremented for deleted + and admin-down tunnel where packets are dropped. It is not worthwhile + to check for this rare case and affect normal path performance. */ + if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index)) + { + stats_n_packets -= 1; + stats_n_bytes -= len0; + if (stats_n_packets) + vlib_increment_combined_counter + (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, + cpu_index, stats_sw_if_index, + stats_n_packets, stats_n_bytes); + stats_n_packets = 1; + stats_n_bytes = len0; + stats_sw_if_index = sw_if_index0; + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->tunnel_index = t0 - vxm->tunnels; + tr->vni = t0->vni; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Do we still need this now that tunnel tx stats is kept? */ + vlib_node_increment_counter (vm, node->node_index, + VXLAN_ENCAP_ERROR_ENCAPSULATED, + pkts_encapsulated); + + /* Increment any remaining batch stats */ + if (stats_n_packets) + { + vlib_increment_combined_counter + (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, + cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); + node->runtime_data[0] = stats_sw_if_index; + } + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (vxlan_encap_node) = { + .function = vxlan_encap, + .name = "vxlan-encap", + .vector_size = sizeof (u32), + .format_trace = format_vxlan_encap_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(vxlan_encap_error_strings), + .error_strings = vxlan_encap_error_strings, + + .n_next_nodes = VXLAN_ENCAP_N_NEXT, + + .next_nodes = { + [VXLAN_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [VXLAN_ENCAP_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/vxlan/vxlan.c b/vnet/vnet/vxlan/vxlan.c new file mode 100644 index 00000000000..75cbc6c6499 --- /dev/null +++ b/vnet/vnet/vxlan/vxlan.c @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vxlan/vxlan.h> + +vxlan_main_t vxlan_main; + +static u8 * format_decap_next (u8 * s, va_list * args) +{ + u32 next_index = va_arg (*args, u32); + + switch (next_index) + { + case VXLAN_INPUT_NEXT_DROP: + return format (s, "drop"); + case VXLAN_INPUT_NEXT_L2_INPUT: + return format (s, "l2"); + case VXLAN_INPUT_NEXT_IP4_INPUT: + return format (s, "ip4"); + case VXLAN_INPUT_NEXT_IP6_INPUT: + return format (s, "ip6"); + default: + return format (s, "unknown %d", next_index); + } + return s; +} + +u8 * format_vxlan_tunnel (u8 * s, va_list * args) +{ + vxlan_tunnel_t * t = va_arg (*args, vxlan_tunnel_t *); + vxlan_main_t * ngm = &vxlan_main; + + s = format (s, + "[%d] %U (src) %U (dst) vni %d encap_fib_index %d", + t - ngm->tunnels, + format_ip4_address, &t->src, + format_ip4_address, &t->dst, + t->vni, + t->encap_fib_index); + s = format (s, " decap_next %U\n", format_decap_next, t->decap_next_index); + return s; +} + +static u8 * format_vxlan_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "vxlan_tunnel%d", dev_instance); +} + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +VNET_DEVICE_CLASS (vxlan_device_class,static) = { + .name = "VXLAN", + .format_device_name = format_vxlan_name, + .format_tx_trace = format_vxlan_encap_trace, + .tx_function = dummy_interface_tx, +}; + +static uword dummy_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + return 0; +} + +static u8 * format_vxlan_header_with_length (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + s = format (s, "unimplemented dev %u", dev_instance); + return s; +} + +VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = { + .name = "VXLAN", + .format_header = format_vxlan_header_with_length, + .set_rewrite = dummy_set_rewrite, +}; + +#define foreach_copy_field \ +_(src.as_u32) \ +_(dst.as_u32) \ +_(vni) \ +_(encap_fib_index) \ +_(decap_next_index) + +static int vxlan_rewrite (vxlan_tunnel_t * t) +{ + u8 *rw = 0; + ip4_header_t * ip0; + ip4_vxlan_header_t * h0; + int len = sizeof (*h0); + + vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES); + + h0 = (ip4_vxlan_header_t *) rw; + + /* Fixed portion of the (outer) ip4 header */ + ip0 = &h0->ip4; + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_UDP; + + /* we fix up the ip4 header length and checksum after-the-fact */ + ip0->src_address.as_u32 = t->src.as_u32; + ip0->dst_address.as_u32 = t->dst.as_u32; + ip0->checksum = ip4_header_checksum (ip0); + + /* UDP header, randomize src port on something, maybe? */ + h0->udp.src_port = clib_host_to_net_u16 (4789); + h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_vxlan); + + /* VXLAN header */ + vnet_set_vni_and_flags(&h0->vxlan, t->vni); + + t->rewrite = rw; + return (0); +} + +int vnet_vxlan_add_del_tunnel +(vnet_vxlan_add_del_tunnel_args_t *a, u32 * sw_if_indexp) +{ + vxlan_main_t * vxm = &vxlan_main; + vxlan_tunnel_t *t = 0; + vnet_main_t * vnm = vxm->vnet_main; + vnet_hw_interface_t * hi; + uword * p; + u32 hw_if_index = ~0; + u32 sw_if_index = ~0; + int rv; + vxlan_tunnel_key_t key; + + key.src = a->dst.as_u32; /* decap src in key is encap dst in config */ + key.vni = clib_host_to_net_u32 (a->vni << 8); + + p = hash_get (vxm->vxlan_tunnel_by_key, key.as_u64); + + if (a->is_add) + { + /* adding a tunnel: tunnel must not already exist */ + if (p) + return VNET_API_ERROR_TUNNEL_EXIST; + + if (a->decap_next_index == ~0) + a->decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT; + + if (a->decap_next_index >= VXLAN_INPUT_N_NEXT) + return VNET_API_ERROR_INVALID_DECAP_NEXT; + + pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES); + memset (t, 0, sizeof (*t)); + + /* copy from arg structure */ +#define _(x) t->x = a->x; + foreach_copy_field; +#undef _ + + rv = vxlan_rewrite (t); + + if (rv) + { + pool_put (vxm->tunnels, t); + return rv; + } + + hash_set (vxm->vxlan_tunnel_by_key, key.as_u64, t - vxm->tunnels); + + if (vec_len (vxm->free_vxlan_tunnel_hw_if_indices) > 0) + { + vnet_interface_main_t * im = &vnm->interface_main; + hw_if_index = vxm->free_vxlan_tunnel_hw_if_indices + [vec_len (vxm->free_vxlan_tunnel_hw_if_indices)-1]; + _vec_len (vxm->free_vxlan_tunnel_hw_if_indices) -= 1; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = t - vxm->tunnels; + hi->hw_instance = hi->dev_instance; + + /* clear old stats of freed tunnel before reuse */ + sw_if_index = hi->sw_if_index; + vnet_interface_counter_lock(im); + vlib_zero_combined_counter + (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], sw_if_index); + vlib_zero_combined_counter + (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX], sw_if_index); + vlib_zero_simple_counter + (&im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], sw_if_index); + vnet_interface_counter_unlock(im); + } + else + { + hw_if_index = vnet_register_interface + (vnm, vxlan_device_class.index, t - vxm->tunnels, + vxlan_hw_class.index, t - vxm->tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->output_node_index = vxlan_encap_node.index; + } + + t->hw_if_index = hw_if_index; + t->sw_if_index = sw_if_index = hi->sw_if_index; + + if (a->decap_next_index == VXLAN_INPUT_NEXT_L2_INPUT) + { + l2input_main_t * l2im = &l2input_main; + /* setup l2 input config with l2 feature and bd 0 to drop packet */ + vec_validate (l2im->configs, sw_if_index); + l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP; + l2im->configs[sw_if_index].bd_index = 0; + } + vnet_sw_interface_set_flags (vnm, sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + } + else + { + /* deleting a tunnel: tunnel must exist */ + if (!p) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + t = pool_elt_at_index (vxm->tunnels, p[0]); + + vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */); + /* make sure tunnel is removed from l2 bd or xconnect */ + set_int_l2_mode(vxm->vlib_main, vnm, MODE_L3, t->sw_if_index, 0, 0, 0, 0); + vec_add1 (vxm->free_vxlan_tunnel_hw_if_indices, t->hw_if_index); + + hash_unset (vxm->vxlan_tunnel_by_key, key.as_u64); + + vec_free (t->rewrite); + t->rewrite = vxlan_dummy_rewrite; + pool_put (vxm->tunnels, t); + } + + if (sw_if_indexp) + *sw_if_indexp = sw_if_index; + + return 0; +} + +static u32 fib_index_from_fib_id (u32 fib_id) +{ + ip4_main_t * im = &ip4_main; + uword * p; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (!p) + return ~0; + + return p[0]; +} + +static uword unformat_decap_next (unformat_input_t * input, va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + u32 tmp; + + if (unformat (input, "l2")) + *result = VXLAN_INPUT_NEXT_L2_INPUT; + else if (unformat (input, "drop")) + *result = VXLAN_INPUT_NEXT_DROP; + else if (unformat (input, "ip4")) + *result = VXLAN_INPUT_NEXT_IP4_INPUT; + else if (unformat (input, "ip6")) + *result = VXLAN_INPUT_NEXT_IP6_INPUT; + else if (unformat (input, "%d", &tmp)) + *result = tmp; + else + return 0; + return 1; +} + +static clib_error_t * +vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ip4_address_t src, dst; + u8 is_add = 1; + u8 src_set = 0; + u8 dst_set = 0; + u32 encap_fib_index = 0; + u32 decap_next_index = ~0; + u32 vni = 0; + u32 tmp; + int rv; + vnet_vxlan_add_del_tunnel_args_t _a, * a = &_a; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "src %U", + unformat_ip4_address, &src)) + src_set = 1; + else if (unformat (line_input, "dst %U", + unformat_ip4_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "encap-vrf-id %d", &tmp)) + { + encap_fib_index = fib_index_from_fib_id (tmp); + if (encap_fib_index == ~0) + return clib_error_return (0, "nonexistent encap-vrf-id %d", tmp); + } + else if (unformat (line_input, "decap-next %U", unformat_decap_next, + &decap_next_index)) + ; + else if (unformat (line_input, "vni %d", &vni)) + { + if (vni >> 24) + return clib_error_return (0, "vni %d out of range", vni); + } + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (src_set == 0) + return clib_error_return (0, "tunnel src address not specified"); + + if (dst_set == 0) + return clib_error_return (0, "tunnel dst address not specified"); + + if (vni == 0) + return clib_error_return (0, "vni not specified"); + + memset (a, 0, sizeof (*a)); + + a->is_add = is_add; + +#define _(x) a->x = x; + foreach_copy_field; +#undef _ + + rv = vnet_vxlan_add_del_tunnel (a, 0 /* hw_if_indexp */); + + switch(rv) + { + case 0: + break; + case VNET_API_ERROR_INVALID_DECAP_NEXT: + return clib_error_return (0, "invalid decap-next..."); + + case VNET_API_ERROR_TUNNEL_EXIST: + return clib_error_return (0, "tunnel already exists..."); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel does not exist..."); + + default: + return clib_error_return + (0, "vnet_vxlan_add_del_tunnel returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = { + .path = "create vxlan tunnel", + .short_help = + "create vxlan tunnel src <local-vtep-addr> dst <remote-vtep-addr> vni <nn>" + " [encap-vrf-id <nn>] [decap-next [l2|ip4|ip6] [del]\n", + .function = vxlan_add_del_tunnel_command_fn, +}; + +static clib_error_t * +show_vxlan_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vxlan_main_t * vxm = &vxlan_main; + vxlan_tunnel_t * t; + + if (pool_elts (vxm->tunnels) == 0) + vlib_cli_output (vm, "No vxlan tunnels configured..."); + + pool_foreach (t, vxm->tunnels, + ({ + vlib_cli_output (vm, "%U", format_vxlan_tunnel, t); + })); + + return 0; +} + +VLIB_CLI_COMMAND (show_vxlan_tunnel_command, static) = { + .path = "show vxlan tunnel", + .function = show_vxlan_tunnel_command_fn, +}; + +clib_error_t *vxlan_init (vlib_main_t *vm) +{ + vxlan_main_t * vxm = &vxlan_main; + ip4_vxlan_header_t * hdr; + ip4_header_t * ip; + + vxm->vnet_main = vnet_get_main(); + vxm->vlib_main = vm; + + /* init dummy rewrite string for deleted vxlan tunnels */ + _vec_len(vxlan_dummy_rewrite) = sizeof(ip4_vxlan_header_t); + hdr = (ip4_vxlan_header_t *) vxlan_dummy_rewrite; + ip = &hdr->ip4; + /* minimal rewrite setup, see vxlan_rewite() above as reference */ + ip->ip_version_and_header_length = 0x45; + ip->checksum = ip4_header_checksum (ip); + + udp_register_dst_port (vm, UDP_DST_PORT_vxlan, + vxlan_input_node.index, 1 /* is_ip4 */); + return 0; +} + +VLIB_INIT_FUNCTION(vxlan_init); + diff --git a/vnet/vnet/vxlan/vxlan.h b/vnet/vnet/vxlan/vxlan.h new file mode 100644 index 00000000000..5c82a3dfd9d --- /dev/null +++ b/vnet/vnet/vxlan/vxlan.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_vxlan_h +#define included_vnet_vxlan_h + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_bd.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/vxlan/vxlan_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/udp.h> + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; /* 20 bytes */ + udp_header_t udp; /* 8 bytes */ + vxlan_header_t vxlan; /* 8 bytes */ +}) ip4_vxlan_header_t; + +typedef CLIB_PACKED(struct { + /* + * Key fields: ip src and vxlan vni on incoming VXLAN packet + * all fields in NET byte order + */ + union { + struct { + u32 src; + u32 vni; /* shifted left 8 bits */ + }; + u64 as_u64; + }; +}) vxlan_tunnel_key_t; + +typedef struct { + /* Rewrite string. $$$$ embed vnet_rewrite header */ + u8 * rewrite; + + /* decap next index */ + u32 decap_next_index; + + /* tunnel src and dst addresses */ + ip4_address_t src; + ip4_address_t dst; + + /* vxlan VNI in HOST byte order */ + u32 vni; + + /* L3 FIB index and L2 BD ID */ + u16 encap_fib_index; /* tunnel partner IP lookup here */ + + /* vnet intfc hw/sw_if_index */ + u16 hw_if_index; + u32 sw_if_index; +} vxlan_tunnel_t; + +#define foreach_vxlan_input_next \ +_(DROP, "error-drop") \ +_(L2_INPUT, "l2-input") \ +_(IP4_INPUT, "ip4-input") \ +_(IP6_INPUT, "ip6-input") + +typedef enum { +#define _(s,n) VXLAN_INPUT_NEXT_##s, + foreach_vxlan_input_next +#undef _ + VXLAN_INPUT_N_NEXT, +} vxlan_input_next_t; + +typedef enum { +#define vxlan_error(n,s) VXLAN_ERROR_##n, +#include <vnet/vxlan/vxlan_error.def> +#undef vxlan_error + VXLAN_N_ERROR, +} vxlan_input_error_t; + +typedef struct { + /* vector of encap tunnel instances */ + vxlan_tunnel_t *tunnels; + + /* lookup tunnel by key */ + uword * vxlan_tunnel_by_key; + + /* Free vlib hw_if_indices */ + u32 * free_vxlan_tunnel_hw_if_indices; + + /* Dummy rewrite for deleted vxlan_tunnels with hw_if_indices as above */ + u64 dummy_str [sizeof(ip4_vxlan_header_t)/sizeof(u64) + 2]; +#define vxlan_dummy_rewrite ((u8 *) &vxlan_main.dummy_str[1]) + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} vxlan_main_t; + +vxlan_main_t vxlan_main; + +vlib_node_registration_t vxlan_input_node; +vlib_node_registration_t vxlan_encap_node; + +u8 * format_vxlan_encap_trace (u8 * s, va_list * args); + +typedef struct { + u8 is_add; + ip4_address_t src, dst; + u32 encap_fib_index; + u32 decap_next_index; + u32 vni; +} vnet_vxlan_add_del_tunnel_args_t; + +int vnet_vxlan_add_del_tunnel +(vnet_vxlan_add_del_tunnel_args_t *a, u32 * sw_if_indexp); + +#endif /* included_vnet_vxlan_h */ diff --git a/vnet/vnet/vxlan/vxlan_error.def b/vnet/vnet/vxlan/vxlan_error.def new file mode 100644 index 00000000000..3ead986cca8 --- /dev/null +++ b/vnet/vnet/vxlan/vxlan_error.def @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +vxlan_error (DECAPSULATED, "good packets decapsulated") +vxlan_error (NO_SUCH_TUNNEL, "no such tunnel packets") diff --git a/vnet/vnet/vxlan/vxlan_packet.h b/vnet/vnet/vxlan/vxlan_packet.h new file mode 100644 index 00000000000..8a9a3b80532 --- /dev/null +++ b/vnet/vnet/vxlan/vxlan_packet.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_vxlan_packet_h__ +#define __included_vxlan_packet_h__ 1 + +/* + * From RFC-7384 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|R|R|I|R|R|R| Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * VXLAN Header: This is an 8-byte field that has: + * + * - Flags (8 bits): where the I flag MUST be set to 1 for a valid + * VXLAN Network ID (VNI). The other 7 bits (designated "R") are + * reserved fields and MUST be set to zero on transmission and + * ignored on receipt. + * + * - VXLAN Segment ID/VXLAN Network Identifier (VNI): this is a + * 24-bit value used to designate the individual VXLAN overlay + * network on which the communicating VMs are situated. VMs in + * different VXLAN overlay networks cannot communicate with each + * other. + * + * - Reserved fields (24 bits and 8 bits): MUST be set to zero on + * transmission and ignored on receipt. + * + */ + +typedef struct { + u32 flags; + u32 vni_reserved; +} vxlan_header_t; + +#define VXLAN_FLAGS_VALID_HOST_BYTE_ORDER (1<<27) +#define VXLAN_FLAGS_VALID_NET_BYTE_ORDER (clib_host_to_net_u32(1<<27)) + +static inline u32 vnet_get_vni (vxlan_header_t * h) +{ + u32 vni_reserved_host_byte_order; + + vni_reserved_host_byte_order = clib_net_to_host_u32 (h->vni_reserved); + return vni_reserved_host_byte_order >> 8; +} + +static inline void vnet_set_vni_and_flags (vxlan_header_t * h, u32 vni) +{ + h->vni_reserved = clib_host_to_net_u32 (vni<<8); + h->flags = VXLAN_FLAGS_VALID_NET_BYTE_ORDER; +} + +#endif /* __included_vxlan_packet_h__ */ |